From 28583bbf42ad95c10f066c5a25cd64d7740f039d Mon Sep 17 00:00:00 2001 From: awolverp Date: Sun, 17 May 2026 18:25:52 +0330 Subject: [PATCH 01/60] ... --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 3c1e8c7..f5dabc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -16,7 +16,7 @@ checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] name = "cachebox" -version = "5.2.1" +version = "5.2.3" dependencies = [ "cfg-if", "fastrand", From e6a5c9d6f669e82c8f95d4ba696e88764b81e4f8 Mon Sep 17 00:00:00 2001 From: awolverp Date: Sun, 17 May 2026 18:27:09 +0330 Subject: [PATCH 02/60] Start refactoring to v6 My targets: - Use the new version of dependencies and the new advantages of them - Use the customized hashbrown's raw - Remove the Python layer of cache implementations, due to PyO3 updates which allows us to have __init__ method. - Use less unsafe codes ( I try to, actually ) - Use rust nightly features to optimize code - Fix some bugs - And make the base ready for async implementations --- Cargo.lock | 152 +- Cargo.toml | 44 +- LICENSE | 2 +- LICENSE-THIRD-PARTY | 70 + cachebox/__init__.py | 1 + .../cachebox/py.typed => cachebox/_core.pyi | 0 pyproject.toml | 2 - python/cachebox/__init__.py | 26 - python/cachebox/_cachebox.py | 2198 -------- python/cachebox/_core.pyi | 83 - python/cachebox/utils.py | 599 --- python/tests/conftest.py | 30 - python/tests/mixin.py | 570 --- python/tests/test_caches.py | 612 --- python/tests/test_concurrency.py | 108 - python/tests/test_utils.py | 415 -- rustfmt.toml | 1 + src/bridge/cache.rs | 315 -- src/bridge/fifocache.rs | 349 -- src/bridge/lfucache.rs | 377 -- src/bridge/lrucache.rs | 363 -- src/bridge/mod.rs | 111 - src/bridge/rrcache.rs | 297 -- src/bridge/ttlcache.rs | 375 -- src/bridge/vttlcache.rs | 373 -- src/common.rs | 594 --- src/hashbrown/alloc.rs | 13 + src/hashbrown/control/bitmask.rs | 107 + src/hashbrown/control/group/generic.rs | 152 + src/hashbrown/control/group/lsx.rs | 124 + src/hashbrown/control/group/mod.rs | 47 + src/hashbrown/control/group/neon.rs | 119 + src/hashbrown/control/group/sse2.rs | 143 + src/hashbrown/control/mod.rs | 10 + src/hashbrown/control/tag.rs | 82 + src/hashbrown/mod.rs | 37 + src/hashbrown/raw.rs | 4558 +++++++++++++++++ src/hashbrown/scopeguard.rs | 72 + src/hashbrown/util.rs | 9 + src/internal/alias.rs | 13 + src/internal/genver.rs | 27 + src/internal/mod.rs | 5 + src/internal/onceinit.rs | 77 + src/internal/pickle.rs | 73 + src/internal/utils.rs | 18 + src/lazyheap.rs | 190 - src/lib.rs | 62 +- src/linked_list.rs | 206 - .../tests/__init__.py => src/macro_rules.rs | 0 src/policies/fifo.rs | 497 -- src/policies/lfu.rs | 428 -- src/policies/lru.rs | 420 -- src/policies/mod.rs | 7 - src/policies/nopolicy.rs | 360 -- src/policies/random.rs | 391 -- src/policies/ttl.rs | 770 --- src/policies/vttl.rs | 597 --- src/pyclasses/base.rs | 0 src/pyclasses/mod.rs | 0 59 files changed, 5834 insertions(+), 11847 deletions(-) create mode 100644 LICENSE-THIRD-PARTY create mode 100644 cachebox/__init__.py rename python/cachebox/py.typed => cachebox/_core.pyi (100%) delete mode 100644 python/cachebox/__init__.py delete mode 100644 python/cachebox/_cachebox.py delete mode 100644 python/cachebox/_core.pyi delete mode 100644 python/cachebox/utils.py delete mode 100644 python/tests/conftest.py delete mode 100644 python/tests/mixin.py delete mode 100644 python/tests/test_caches.py delete mode 100644 python/tests/test_concurrency.py delete mode 100644 python/tests/test_utils.py create mode 100644 rustfmt.toml delete mode 100644 src/bridge/cache.rs delete mode 100644 src/bridge/fifocache.rs delete mode 100644 src/bridge/lfucache.rs delete mode 100644 src/bridge/lrucache.rs delete mode 100644 src/bridge/mod.rs delete mode 100644 src/bridge/rrcache.rs delete mode 100644 src/bridge/ttlcache.rs delete mode 100644 src/bridge/vttlcache.rs delete mode 100644 src/common.rs create mode 100644 src/hashbrown/alloc.rs create mode 100644 src/hashbrown/control/bitmask.rs create mode 100644 src/hashbrown/control/group/generic.rs create mode 100644 src/hashbrown/control/group/lsx.rs create mode 100644 src/hashbrown/control/group/mod.rs create mode 100644 src/hashbrown/control/group/neon.rs create mode 100644 src/hashbrown/control/group/sse2.rs create mode 100644 src/hashbrown/control/mod.rs create mode 100644 src/hashbrown/control/tag.rs create mode 100644 src/hashbrown/mod.rs create mode 100644 src/hashbrown/raw.rs create mode 100644 src/hashbrown/scopeguard.rs create mode 100644 src/hashbrown/util.rs create mode 100644 src/internal/alias.rs create mode 100644 src/internal/genver.rs create mode 100644 src/internal/mod.rs create mode 100644 src/internal/onceinit.rs create mode 100644 src/internal/pickle.rs create mode 100644 src/internal/utils.rs delete mode 100644 src/lazyheap.rs delete mode 100644 src/linked_list.rs rename python/tests/__init__.py => src/macro_rules.rs (100%) delete mode 100644 src/policies/fifo.rs delete mode 100644 src/policies/lfu.rs delete mode 100644 src/policies/lru.rs delete mode 100644 src/policies/random.rs delete mode 100644 src/policies/ttl.rs delete mode 100644 src/policies/vttl.rs create mode 100644 src/pyclasses/base.rs create mode 100644 src/pyclasses/mod.rs diff --git a/Cargo.lock b/Cargo.lock index f5dabc8..e879d6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,26 +2,21 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "autocfg" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" - [[package]] name = "bitflags" -version = "2.9.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" [[package]] name = "cachebox" +<<<<<<< Updated upstream version = "5.2.3" +======= +version = "6.0.0" +>>>>>>> Stashed changes dependencies = [ "cfg-if", - "fastrand", - "hashbrown", - "mimalloc", "parking_lot", "pyo3", "pyo3-build-config", @@ -29,9 +24,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.40" +version = "1.2.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d05d92f4b1fd76aad469d46cdd858ca761576082cd37df81416691e50199fb" +checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20" dependencies = [ "find-msvc-tools", "shlex", @@ -39,27 +34,15 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" - -[[package]] -name = "fastrand" -version = "2.3.0" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "find-msvc-tools" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" - -[[package]] -name = "hashbrown" -version = "0.14.5" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "heck" @@ -67,27 +50,11 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "indoc" -version = "2.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" - [[package]] name = "libc" -version = "0.2.171" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" - -[[package]] -name = "libmimalloc-sys" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "667f4fec20f29dfc6bc7357c582d91796c169ad7e2fce709468aefeb2c099870" -dependencies = [ - "cc", - "libc", -] +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "lock_api" @@ -98,29 +65,11 @@ dependencies = [ "scopeguard", ] -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - -[[package]] -name = "mimalloc" -version = "0.1.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1ee66a4b64c74f4ef288bcbb9192ad9c3feaad75193129ac8509af543894fd8" -dependencies = [ - "libmimalloc-sys", -] - [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "parking_lot" @@ -147,41 +96,38 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.11.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "proc-macro2" -version = "1.0.94" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37a6df7eab65fc7bee654a421404947e10a0f7085b6951bf2ea395f4659fb0cf" +checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ - "indoc", "libc", - "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f77d387774f6f6eec64a004eac0ed525aab7fa1966d94b42f743797b3e395afb" +checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" dependencies = [ "python3-dll-a", "target-lexicon", @@ -189,9 +135,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dd13844a4242793e02df3e2ec093f540d948299a6a77ea9ce7afd8623f542be" +checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" dependencies = [ "libc", "pyo3-build-config", @@ -199,9 +145,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf8f9f1108270b90d3676b8679586385430e5c0bb78bb5f043f95499c821a71" +checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -211,9 +157,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a3b2274450ba5288bc9b8c1b69ff569d1d61189d4bff38f8d22e03d17f932b" +checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" dependencies = [ "heck", "proc-macro2", @@ -224,27 +170,27 @@ dependencies = [ [[package]] name = "python3-dll-a" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d381ef313ae70b4da5f95f8a4de773c6aa5cd28f73adec4b4a31df70b66780d8" +checksum = "d80ba7540edb18890d444c5aa8e1f1f99b1bdf26fb26ae383135325f4a36042b" dependencies = [ "cc", ] [[package]] name = "quote" -version = "1.0.40" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] [[package]] name = "redox_syscall" -version = "0.5.10" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags", ] @@ -263,15 +209,15 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "syn" -version = "2.0.100" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -280,24 +226,18 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.13.2" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" - -[[package]] -name = "unindent" -version = "0.2.4" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "windows-link" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" diff --git a/Cargo.toml b/Cargo.toml index 15a7461..b69d46a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cachebox" -version = "5.2.3" +version = "6.0.0" edition = "2021" description = "The fastest memoizing and caching Python library written in Rust" readme = "README.md" @@ -11,7 +11,7 @@ authors = ["awolverp"] [lib] name = "cachebox" -crate-type = ["cdylib"] +crate-type = ["cdylib", "rlib"] [profile.release] codegen-units = 1 @@ -19,38 +19,20 @@ debug = false incremental = false lto = true panic = "abort" -strip = "symbols" +strip = true -[features] -mimalloc = ["dep:mimalloc"] - -[dependencies.hashbrown] -version = "0.14.5" -default-features = false -features = ["inline-more", "raw"] - -[dependencies.fastrand] -version = "2.3.0" - -[dependencies.pyo3] -version = "0.27.1" -default-features = false -features = ["macros", "extension-module", "generate-import-lib"] +[dependencies] +cfg-if = "1.0.4" +parking_lot = {version="0.12.5", default-features=false} +pyo3 = {version="0.28.3", default-features=false, features=["macros", "generate-import-lib"]} -[dependencies.cfg-if] -version = "1.0.3" +[build-dependencies] +pyo3-build-config = {version="0.28.3", default-features=false, features=["resolve-config"]} -[dependencies.parking_lot] -version = "0.12.5" - -[dependencies.mimalloc] -version = "0.1.48" -features = ["v3", "override", "local_dynamic_tls"] -optional = true - -[build-dependencies.pyo3-build-config] -version = "0.27.1" -features = ["resolve-config"] +[features] +default = ["inline-more", "extension-module"] +inline-more = [] +extension-module = ["pyo3/extension-module"] [lints.clippy] dbg_macro = "warn" diff --git a/LICENSE b/LICENSE index 1b08669..e5f48c2 100644 --- a/LICENSE +++ b/LICENSE @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/LICENSE-THIRD-PARTY b/LICENSE-THIRD-PARTY new file mode 100644 index 0000000..dc6cf36 --- /dev/null +++ b/LICENSE-THIRD-PARTY @@ -0,0 +1,70 @@ +# Third-Party Licenses + +This project includes code from the following third-party sources: + +--- + +## hashbrown + +Repository: https://github.com/rust-lang/hashbrown +License: MIT OR Apache-2.0 + +### MIT License + +Copyright (c) 2016 Amanieu d'Antras + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +--- + +### Apache License 2.0 + +Copyright (c) 2016 Amanieu d'Antras + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +--- diff --git a/cachebox/__init__.py b/cachebox/__init__.py new file mode 100644 index 0000000..fa5bd09 --- /dev/null +++ b/cachebox/__init__.py @@ -0,0 +1 @@ +from ._core import Cache diff --git a/python/cachebox/py.typed b/cachebox/_core.pyi similarity index 100% rename from python/cachebox/py.typed rename to cachebox/_core.pyi diff --git a/pyproject.toml b/pyproject.toml index d2b779e..9200205 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,6 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -47,7 +46,6 @@ Homepage = 'https://github.com/awolverp/cachebox' asyncio_default_fixture_loop_scope = "function" [tool.maturin] -python-source = "python" features = ["pyo3/extension-module"] module-name = "cachebox._core" diff --git a/python/cachebox/__init__.py b/python/cachebox/__init__.py deleted file mode 100644 index 3438d0c..0000000 --- a/python/cachebox/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -from ._core import ( - __author__ as __author__, - __version__ as __version__, -) -from ._cachebox import ( - BaseCacheImpl as BaseCacheImpl, - Cache as Cache, - FIFOCache as FIFOCache, - RRCache as RRCache, - LRUCache as LRUCache, - LFUCache as LFUCache, - TTLCache as TTLCache, - VTTLCache as VTTLCache, - IteratorView as IteratorView, -) -from .utils import ( - Frozen as Frozen, - cached as cached, - cachedmethod as cachedmethod, - make_key as make_key, - make_hash_key as make_hash_key, - make_typed_key as make_typed_key, - EVENT_HIT as EVENT_HIT, - EVENT_MISS as EVENT_MISS, - is_cached as is_cached, -) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py deleted file mode 100644 index fb47917..0000000 --- a/python/cachebox/_cachebox.py +++ /dev/null @@ -1,2198 +0,0 @@ -import copy as _std_copy -import typing -from datetime import datetime, timedelta - -from . import _core -from ._core import BaseCacheImpl - -KT = typing.TypeVar("KT") -VT = typing.TypeVar("VT") -DT = typing.TypeVar("DT") - - -def _items_to_str(items: typing.Iterable[typing.Any], length) -> str: - if length <= 50: - return "{" + ", ".join(f"{k!r}: {v!r}" for k, v in items) + "}" - - c = 0 - left = [] - - while c < length: - k, v = next(items) # type: ignore[call-overload] - - if c <= 50: - left.append(f"{k!r}: {v!r}") - - else: - break - - c += 1 - - return "{%s, ... %d more ...}" % (", ".join(left), length - c) - - -class IteratorView(typing.Generic[VT]): - __slots__ = ("iterator", "func") - - def __init__(self, iterator, func: typing.Callable[[tuple], typing.Any]): - self.iterator = iterator - self.func = func - - def __iter__(self): - self.iterator = self.iterator.__iter__() - return self - - def __next__(self) -> VT: - return self.func(self.iterator.__next__()) - - -class Cache(BaseCacheImpl[KT, VT]): - """ - A thread-safe, memory-efficient hashmap-like cache with configurable maximum size. - - Provides a flexible key-value storage mechanism with: - - Configurable maximum size (zero means unlimited) - - Lower memory usage compared to standard dict - - Thread-safe operations - - Useful memory management methods - - Differs from standard dict by: - - Being thread-safe - - Unordered storage - - Size limitation - - Memory efficiency - - Additional cache management methods - - Supports initialization with optional initial data and capacity, - and provides dictionary-like access with additional cache-specific operations. - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[dict, typing.Iterable[tuple], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new Cache instance. - - Args: - maxsize (int): Maximum number of elements the cache can hold. Zero means unlimited. - iterable (Union[Cache, dict, tuple, Generator, None], optional): Initial data to populate the cache. Defaults to None. - capacity (int, optional): Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. - maxmemory (int, optional): Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - On PyPy, it works same as `maxsize` if objects do not support `__sizeof__` - method. - - Creates a new cache with specified size constraints and optional initial data. The cache can be pre-sized - to improve performance when the number of expected elements is known in advance. - """ - self._raw = _core.Cache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Equals to `self[key] = value`, but returns a value: - - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; - - Note: raises `OverflowError` if the cache reached the maxsize limit, - because this class does not have any algorithm. - """ - return self._raw.insert(key, value) - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. Return the value for key if key is - in the cache, else `default`. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.NoReturn: # pragma: no cover - raise NotImplementedError() - - def drain(self, n: int) -> typing.NoReturn: # pragma: no cover - raise NotImplementedError() - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - - Note: raises `OverflowError` if the cache reached the maxsize limit. - """ - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, Cache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, Cache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x[0]) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x[1]) - - def copy(self) -> "Cache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "Cache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "Cache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - _items_to_str(self._raw.items(), len(self._raw)), - ) - - -class FIFOCache(BaseCacheImpl[KT, VT]): - """ - A First-In-First-Out (FIFO) cache implementation with configurable maximum size and optional initial capacity. - - This cache provides a fixed-size container that automatically removes the oldest items when the maximum size is reached. - Supports various operations like insertion, retrieval, deletion, and iteration. - - Attributes: - maxsize: The maximum number of items the cache can hold. - capacity: The initial capacity of the cache before resizing. - - Key features: - - Deterministic item eviction order (oldest items removed first) - - Efficient key-value storage and retrieval - - Supports dictionary-like operations - - Allows optional initial data population - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new FIFOCache instance. - - Args: - maxsize: The maximum number of items the cache can hold. - iterable: Optional initial data to populate the cache. Can be another FIFOCache, - a dictionary, tuple, generator, or None. - capacity: Optional initial capacity of the cache before resizing. Defaults to 0. - maxmemory: Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - When maxmemory is set, updating an existing key can evict the updated key - if it is the oldest entry. - """ - self._raw = _core.FIFOCache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Inserts a key-value pair into the cache, returning the previous value if the key existed. - - Equivalent to `self[key] = value`, but with additional return value semantics: - - - If the key was not previously in the cache, returns None. - - If the key was already present, updates the value and returns the old value. - The key itself is not modified. - - Args: - key: The key to insert. - value: The value to associate with the key. - - Returns: - The previous value associated with the key, or None if the key was not present. - """ - return self._raw.insert(key, value) - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] # type: ignore[return-value] - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.Tuple[KT, VT]: - """Removes the element that has been in the cache the longest.""" - try: - return self._raw.popitem() - except _core.CoreKeyError: - raise KeyError() from None - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, FIFOCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, FIFOCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[0]) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[1]) - - def first(self, n: int = 0) -> typing.Optional[KT]: - """ - Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). - - By using `n` parameter, you can browse order index by index. - """ - if n < 0: - n = len(self._raw) + n - - if n < 0: - return None - - return self._raw.get_index(n) - - def last(self) -> typing.Optional[KT]: - """ - Returns the last key in cache. Equals to `self.first(-1)`. - """ - return self._raw.get_index(len(self._raw) - 1) - - def copy(self) -> "FIFOCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "FIFOCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "FIFOCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - _items_to_str(self._raw.items(), len(self._raw)), - ) - - -class RRCache(BaseCacheImpl[KT, VT]): - """ - A thread-safe cache implementation with Random Replacement (RR) policy. - - This cache randomly selects and removes elements when the cache reaches its maximum size, - ensuring a simple and efficient caching mechanism with configurable capacity. - - Supports operations like insertion, retrieval, deletion, and iteration. - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new RRCache instance. - - Args: - maxsize (int): Maximum size of the cache. A value of zero means unlimited capacity. - iterable (dict or Iterable[tuple], optional): Initial data to populate the cache. Defaults to None. - capacity (int, optional): Preallocated capacity for the cache to minimize reallocations. Defaults to 0. - maxmemory (int, optional): Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - When maxmemory is set, updates can evict any key, including the updated key. - On PyPy. In PyPy, the size of each object is assumed to be 1 if the object - does not have a `__sizeof__` method. - - Note: - - The cache size limit is immutable after initialization. - - If an iterable is provided, the cache will be populated using the update method. - """ - self._raw = _core.RRCache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Inserts a key-value pair into the cache, returning the previous value if the key existed. - - Equivalent to `self[key] = value`, but with additional return value semantics: - - - If the key was not previously in the cache, returns None. - - If the key was already present, updates the value and returns the old value. - The key itself is not modified. - - Args: - key: The key to insert. - value: The value to associate with the key. - - Returns: - The previous value associated with the key, or None if the key was not present. - """ - return self._raw.insert(key, value) - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.Tuple[KT, VT]: - """Randomly selects and removes a (key, value) pair from the cache.""" - try: - return self._raw.popitem() - except _core.CoreKeyError: - raise KeyError() from None - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def random_key(self) -> KT: - """ - Randomly selects and returns a key from the cache. - Raises `KeyError` If the cache is empty. - """ - try: - return self._raw.random_key() - except _core.CoreKeyError: - raise KeyError() from None - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, RRCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, RRCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x[0]) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are not ordered. - """ - return IteratorView(self._raw.items(), lambda x: x[1]) - - def copy(self) -> "RRCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "RRCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "RRCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - _items_to_str(self._raw.items(), len(self._raw)), - ) - - -class LRUCache(BaseCacheImpl[KT, VT]): - """ - Thread-safe Least Recently Used (LRU) cache implementation. - - Provides a cache that automatically removes the least recently used items when - the cache reaches its maximum size. Supports various operations like insertion, - retrieval, and management of cached items with configurable maximum size and - initial capacity. - - Key features: - - Configurable maximum cache size - - Optional initial capacity allocation - - Thread-safe operations - - Efficient key-value pair management - - Supports initialization from dictionaries or iterables - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new LRU Cache instance. - - Args: - maxsize (int): Maximum size of the cache. Zero indicates unlimited size. - iterable (dict | Iterable[tuple], optional): Initial data to populate the cache. - capacity (int, optional): Pre-allocated capacity for the cache to minimize reallocations. - maxmemory (int, optional): Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - On PyPy. In PyPy, the size of each object is assumed to be 1 if the object - does not have a `__sizeof__` method. - - Notes: - - The cache size is immutable after initialization. - - If an iterable is provided, it will be used to populate the cache. - """ - self._raw = _core.LRUCache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Inserts a key-value pair into the cache, returning the previous value if the key existed. - - Equivalent to `self[key] = value`, but with additional return value semantics: - - - If the key was not previously in the cache, returns None. - - If the key was already present, updates the value and returns the old value. - The key itself is not modified. - - Args: - key: The key to insert. - value: The value to associate with the key. - - Returns: - The previous value associated with the key, or None if the key was not present. - """ - return self._raw.insert(key, value) - - def peek(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Searches for a key-value in the cache and returns it (without moving the key to recently used). - """ - try: - return self._raw.peek(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.Tuple[KT, VT]: - """ - Removes the least recently used item from the cache and returns it as a (key, value) tuple. - Raises KeyError if the cache is empty. - """ - try: - return self._raw.popitem() - except _core.CoreKeyError: # pragma: no cover - raise KeyError() from None - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, LRUCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, LRUCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[0]) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[1]) - - def least_recently_used(self) -> typing.Optional[KT]: - """ - Returns the key in the cache that has not been accessed in the longest time. - """ - return self._raw.least_recently_used() - - def most_recently_used(self) -> typing.Optional[KT]: - """ - Returns the key in the cache that has been accessed in the shortest time. - """ - return self._raw.most_recently_used() - - def copy(self) -> "LRUCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "LRUCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "LRUCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - _items_to_str(self._raw.items(), len(self._raw)), - ) - - -class LFUCache(BaseCacheImpl[KT, VT]): - """ - A thread-safe Least Frequently Used (LFU) cache implementation. - - This cache removes elements that have been accessed the least number of times, - regardless of their access time. It provides methods for inserting, retrieving, - and managing cache entries with configurable maximum size and initial capacity. - - Key features: - - Thread-safe cache with LFU eviction policy - - Configurable maximum size and initial capacity - - Supports initialization from dictionaries or iterables - - Provides methods for key-value management similar to dict - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new Least Frequently Used (LFU) cache. - - Args: - maxsize (int): Maximum size of the cache. A value of zero means unlimited size. - iterable (dict or Iterable[tuple], optional): Initial data to populate the cache. - capacity (int, optional): Initial hash table capacity to minimize reallocations. Defaults to 0. - maxmemory (int, optional): Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - On PyPy. In PyPy, the size of each object is assumed to be 1 if the object - does not have a `__sizeof__` method. - - The cache uses a thread-safe LFU eviction policy, removing least frequently accessed items when the cache reaches its maximum size. - """ - self._raw = _core.LFUCache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Inserts a key-value pair into the cache, returning the previous value if the key existed. - - Equivalent to `self[key] = value`, but with additional return value semantics: - - - If the key was not previously in the cache, returns None. - - If the key was already present, updates the value and returns the old value. - The key itself is not modified. - - Args: - key: The key to insert. - value: The value to associate with the key. - - Returns: - The previous value associated with the key, or None if the key was not present. - """ - return self._raw.insert(key, value) - - def peek( - self, key: KT, default: typing.Optional[DT] = None - ) -> typing.Union[VT, DT]: # pragma: no cover - """ - Searches for a key-value in the cache and returns it (without moving the key to recently used). - """ - try: - return self._raw.peek(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key) - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.Tuple[KT, VT]: - """ - Removes and returns the least frequently used (LFU) item from the cache. - """ - try: - return self._raw.popitem() - except _core.CoreKeyError: # pragma: no cover - raise KeyError() from None - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, LFUCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, LFUCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: (x[0], x[1])) - - def items_with_frequency(self) -> IteratorView[typing.Tuple[KT, VT, int]]: - """ - Returns an iterable view - containing tuples of `(key, value, frequency)` - of the cache's items along with their access frequency. - - Notes: - - The returned iterator should not be used to modify the cache. - - Frequency represents how many times the item has been accessed. - """ - return IteratorView(self._raw.items(), lambda x: x) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[0]) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x[1]) - - def least_frequently_used(self, n: int = 0) -> typing.Optional[KT]: - """ - Returns the key in the cache that has been accessed the least, regardless of time. - - If n is given, returns the nth least frequently used key. - - Notes: - - This method may re-sort the cache which can cause iterators to be stopped. - - Do not use this method while using iterators. - """ - if n < 0: - n = len(self._raw) + n - - if n < 0: - return None - - return self._raw.least_frequently_used(n) - - def copy(self) -> "LFUCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "LFUCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "LFUCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - # NOTE: we cannot use self._raw.items() here because iterables a tuples of (key, value, frequency) - _items_to_str(self.items(), len(self._raw)), - ) - - -class TTLCache(BaseCacheImpl[KT, VT]): - """ - A thread-safe Time-To-Live (TTL) cache implementation with configurable maximum size and expiration. - - This cache automatically removes elements that have expired based on their time-to-live setting. - Supports various operations like insertion, retrieval, and iteration. - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - ttl: typing.Union[float, timedelta], - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new TTL cache instance. - - Args: - maxsize: Maximum number of elements the cache can hold. - ttl: Time-to-live for cache entries, either as seconds or a timedelta. - iterable: Optional initial items to populate the cache, can be a dict or iterable of tuples. - capacity: Optional initial capacity for the underlying cache storage. Defaults to 0. - maxmemory: Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - On PyPy. In PyPy, the size of each object is assumed to be 1 if the object - does not have a `__sizeof__` method. - - Raises: - ValueError: If the time-to-live (ttl) is not a positive number. - """ - if isinstance(ttl, timedelta): - ttl = ttl.total_seconds() - - if ttl <= 0: - raise ValueError("ttl must be a positive number and non-zero") - - self._raw = _core.TTLCache(maxsize, ttl, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - @property - def ttl(self) -> float: - return self._raw.ttl() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Inserts a key-value pair into the cache, returning the previous value if the key existed. - - Equivalent to `self[key] = value`, but with additional return value semantics: - - - If the key was not previously in the cache, returns None. - - If the key was already present, updates the value and returns the old value. - The key itself is not modified. - - Args: - key: The key to insert. - value: The value to associate with the key. - - Returns: - The previous value associated with the key, or None if the key was not present. - """ - return self._raw.insert(key, value) - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key).value() - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def get_with_expire( - self, key: KT, default: typing.Optional[DT] = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Retrieves the value and expiration duration for a given key from the cache. - - Returns a tuple containing the value associated with the key and its duration. - If the key is not found, returns the default value and 0.0 duration. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - A tuple of (value, duration), where value is the cached value or default, - and duration is the time-to-live for the key (or 0.0 if not found). - """ - try: - pair = self._raw.get(key) - except _core.CoreKeyError: - return default, 0.0 # type: ignore[return-value] - else: - return (pair.value(), pair.duration()) - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key).value() - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop_with_expire( - self, key: KT, default: typing.Optional[DT] = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Removes the specified key from the cache and returns its value and expiration duration. - - If the key is not found, returns the default value and 0.0 duration. - - Args: - key: The key to remove from the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - A tuple of (value, duration), where value is the cached value or default, - and duration is the time-to-live for the key (or 0.0 if not found). - """ - try: - pair = self._raw.remove(key) - except _core.CoreKeyError: - return default, 0.0 # type: ignore[return-value] - else: - return (pair.value(), pair.duration()) - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - return self._raw.setdefault(key, default) - - def popitem(self) -> typing.Tuple[KT, VT]: - """Removes the element that has been in the cache the longest.""" - try: - val = self._raw.popitem() - except _core.CoreKeyError: - raise KeyError() from None - else: - return val.pack2() - - def popitem_with_expire(self) -> typing.Tuple[KT, VT, float]: - """ - Removes and returns the element that has been in the cache the longest, along with its key and expiration duration. - - If the cache is empty, raises a KeyError. - - Returns: - A tuple of (key, value, duration), where: - - key is the key of the removed item - - value is the value of the removed item - - duration is the time-to-live for the removed item - """ - try: - val = self._raw.popitem() - except _core.CoreKeyError: - raise KeyError() from None - else: - return val.pack3() - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - self._raw.update(iterable) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key).value() - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, TTLCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, TTLCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items_with_expire(self) -> IteratorView[typing.Tuple[KT, VT, float]]: - """ - Returns an iterable object of the cache's items (key-value pairs along with their expiration duration). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.pack3()) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.pack2()) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.key()) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.value()) - - def first(self, n: int = 0) -> typing.Optional[KT]: # pragma: no cover - """ - Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). - - By using `n` parameter, you can browse order index by index. - """ - if n < 0: - n = len(self._raw) + n - - if n < 0: - return None - - return self._raw.get_index(n) - - def last(self) -> typing.Optional[KT]: - """ - Returns the last key in cache. Equals to `self.first(-1)`. - """ - return self._raw.get_index(len(self._raw) - 1) - - def expire(self) -> None: # pragma: no cover - """ - Manually removes expired key-value pairs from memory and releases their memory. - - Notes: - - This operation is typically automatic and does not require manual invocation. - """ - self._raw.expire() - - def copy(self) -> "TTLCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "TTLCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "TTLCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d, ttl=%f](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - self._raw.ttl(), - _items_to_str(self.items(), len(self._raw)), - ) - - -class VTTLCache(BaseCacheImpl[KT, VT]): - """ - A thread-safe, time-to-live (TTL) cache implementation with per-key expiration policy. - - This cache allows storing key-value pairs with optional expiration times. When an item expires, - it is automatically removed from the cache. The cache supports a maximum size and provides - various methods for inserting, retrieving, and managing cached items. - - Key features: - - Per-key time-to-live (TTL) support - - Configurable maximum cache size - - Thread-safe operations - - Automatic expiration of items - - Supports dictionary-like operations such as get, insert, update, and iteration. - """ - - __slots__ = ("_raw",) - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, - ttl: typing.Union[float, timedelta, datetime, None] = None, # This is not a global TTL! - *, - capacity: int = 0, - maxmemory: int = 0, - ) -> None: - """ - Initialize a new VTTLCache instance. - - Args: - maxsize (int): Maximum size of the cache. Zero indicates unlimited size. - iterable (dict or Iterable[tuple], optional): Initial data to populate the cache. - ttl (float or timedelta or datetime, optional): Time-to-live duration for `iterable` items. - capacity (int, optional): Preallocated capacity for the cache to minimize reallocations. - maxmemory (int, optional): Maximum memory (bytes) allowed for cached entries. Zero means unlimited. - On PyPy. In PyPy, the size of each object is assumed to be 1 if the object - does not have a `__sizeof__` method. - - Raises: - ValueError: If provided TTL is zero or negative. - """ - self._raw = _core.VTTLCache(maxsize, capacity=capacity, maxmemory=maxmemory) - - if iterable is not None: - self.update(iterable, ttl) - - @property - def maxsize(self) -> int: - return self._raw.maxsize() - - @property - def maxmemory(self) -> int: - return self._raw.maxmemory() - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - return self._raw.capacity() - - def memory(self) -> int: - """Returns the total estimated memory usage of cached entries in bytes.""" - return self._raw.memory() - - def __len__(self) -> int: - return len(self._raw) - - def __sizeof__(self): # pragma: no cover - return self._raw.__sizeof__() - - def __contains__(self, key: KT) -> bool: - return key in self._raw - - def __bool__(self) -> bool: - return not self.is_empty() - - def is_empty(self) -> bool: - return self._raw.is_empty() - - def is_full(self) -> bool: - return self._raw.is_full() - - def insert( - self, - key: KT, - value: VT, - ttl: typing.Union[float, timedelta, datetime, None] = None, - ) -> typing.Optional[VT]: - """ - Insert a key-value pair into the cache with an optional time-to-live (TTL). - Returns the previous value associated with the key, if it existed. - - Args: - key (KT): The key to insert. - value (VT): The value to associate with the key. - ttl (float or timedelta or datetime, optional): Time-to-live duration for the item. - If a timedelta or datetime is provided, it will be converted to seconds. - - Raises: - ValueError: If the provided TTL is zero or negative. - """ - if ttl is not None: # pragma: no cover - if isinstance(ttl, timedelta): - ttl = ttl.total_seconds() - - elif isinstance(ttl, datetime): - ttl = (ttl - datetime.now()).total_seconds() - - if ttl <= 0: - raise ValueError("ttl must be positive and non-zero") - - return self._raw.insert(key, value, ttl) - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Retrieves the value for a given key from the cache. - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. - """ - try: - return self._raw.get(key).value() - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def get_with_expire( - self, key: KT, default: typing.Optional[DT] = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Retrieves the value and expiration duration for a given key from the cache. - - Returns a tuple containing the value associated with the key and its duration. - If the key is not found, returns the default value and 0.0 duration. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - A tuple of (value, duration), where value is the cached value or default, - and duration is the time-to-live for the key (or 0.0 if not found). - """ - try: - pair = self._raw.get(key) - except _core.CoreKeyError: - return default, 0.0 # type: ignore[return-value] - else: - return (pair.value(), pair.duration()) - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. If the key is not found, returns the `default`. - """ - try: - return self._raw.remove(key).value() - except _core.CoreKeyError: - return default # type: ignore[return-value] - - def pop_with_expire( - self, key: KT, default: typing.Optional[DT] = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Removes the specified key from the cache and returns its value and expiration duration. - - If the key is not found, returns the default value and 0.0 duration. - - Args: - key: The key to remove from the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - A tuple of (value, duration), where value is the cached value or default, - and duration is the time-to-live for the key (or 0.0 if not found). - """ - try: - pair = self._raw.remove(key) - except _core.CoreKeyError: - return default, 0.0 # type: ignore[return-value] - else: - return (pair.value(), pair.duration()) - - def setdefault( - self, - key: KT, - default: typing.Optional[DT] = None, - ttl: typing.Union[float, timedelta, datetime, None] = None, - ) -> typing.Union[VT, DT]: - """ - Inserts a key-value pair into the cache with an optional time-to-live (TTL). - - If the key is not in the cache, it will be inserted with the default value. - If the key already exists, its current value is returned. - - Args: - key: The key to insert or retrieve from the cache. - default: The value to insert if the key is not present. Defaults to None. - ttl: Optional time-to-live for the key. Can be a float (seconds), timedelta, or datetime. - If not specified, the key will not expire. - - Returns: - The value associated with the key, either existing or the default value. - - Raises: - ValueError: If the provided TTL is not a positive value. - """ - if ttl is not None: # pragma: no cover - if isinstance(ttl, timedelta): - ttl = ttl.total_seconds() - - elif isinstance(ttl, datetime): - ttl = (ttl - datetime.now()).total_seconds() - - if ttl <= 0: - raise ValueError("ttl must be positive and non-zero") - - return self._raw.setdefault(key, default, ttl) - - def popitem(self) -> typing.Tuple[KT, VT]: - """ - Removes and returns the key-value pair that is closest to expiration. - - Returns: - A tuple containing the key and value of the removed item. - - Raises: - KeyError: If the cache is empty. - """ - try: - val = self._raw.popitem() - except _core.CoreKeyError: # pragma: no cover - raise KeyError() from None - else: - return val.pack2() - - def popitem_with_expire(self) -> typing.Tuple[KT, VT, float]: - """ - Removes and returns the key-value pair that is closest to expiration, along with its expiration duration. - - Returns: - A tuple containing the key, value, and expiration duration of the removed item. - - Raises: - KeyError: If the cache is empty. - """ - try: - val = self._raw.popitem() - except _core.CoreKeyError: - raise KeyError() from None - else: - return val.pack3() - - def drain(self, n: int) -> int: # pragma: no cover - """Does the `popitem()` `n` times and returns count of removed items.""" - if n <= 0: - return 0 - - for i in range(n): - try: - self._raw.popitem() - except _core.CoreKeyError: - return i - - return i - - def update( - self, - iterable: typing.Union[dict, typing.Iterable[tuple]], - ttl: typing.Union[float, timedelta, datetime, None] = None, - ) -> None: - """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" - if hasattr(iterable, "items"): - iterable = iterable.items() - - if ttl is not None: # pragma: no cover - if isinstance(ttl, timedelta): - ttl = ttl.total_seconds() - - elif isinstance(ttl, datetime): - ttl = (ttl - datetime.now()).total_seconds() - - if ttl <= 0: - raise ValueError("ttl must be positive and non-zero") - - self._raw.update(iterable, ttl) - - def __setitem__(self, key: KT, value: VT) -> None: - self.insert(key, value, None) - - def __getitem__(self, key: KT) -> VT: - try: - return self._raw.get(key).value() - except _core.CoreKeyError: - raise KeyError(key) from None - - def __delitem__(self, key: KT) -> None: - try: - self._raw.remove(key) - except _core.CoreKeyError: - raise KeyError(key) from None - - def __eq__(self, other) -> bool: - if not isinstance(other, VTTLCache): - return False # pragma: no cover - - return self._raw == other._raw - - def __ne__(self, other) -> bool: - if not isinstance(other, VTTLCache): - return False # pragma: no cover - - return self._raw != other._raw - - def shrink_to_fit(self) -> None: - """Shrinks the cache to fit len(self) elements.""" - self._raw.shrink_to_fit() - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - self._raw.clear(reuse) - - def items_with_expire(self) -> IteratorView[typing.Tuple[KT, VT, float]]: - """ - Returns an iterable object of the cache's items (key-value pairs along with their expiration duration). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.pack3()) - - def items(self) -> IteratorView[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.pack2()) - - def keys(self) -> IteratorView[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.key()) - - def values(self) -> IteratorView[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - return IteratorView(self._raw.items(), lambda x: x.value()) - - def expire(self) -> None: # pragma: no cover - """ - Manually removes expired key-value pairs from memory and releases their memory. - - Notes: - - This operation is typically automatic and does not require manual invocation. - """ - self._raw.expire() - - def copy(self) -> "VTTLCache[KT, VT]": - """Returns a shallow copy of the cache""" - return self.__copy__() - - def __copy__(self) -> "VTTLCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.copy(self._raw) - return copied - - def __deepcopy__(self, memo) -> "VTTLCache[KT, VT]": - cls = type(self) - copied = cls.__new__(cls) - copied._raw = _std_copy.deepcopy(self._raw, memo) - return copied - - def __iter__(self) -> IteratorView[KT]: - return self.keys() - - def __repr__(self) -> str: - cls = type(self) - - return "%s.%s[%d/%d](%s)" % ( - cls.__module__, - cls.__name__, - len(self._raw), - self._raw.maxsize(), - _items_to_str(self.items(), len(self._raw)), - ) diff --git a/python/cachebox/_core.pyi b/python/cachebox/_core.pyi deleted file mode 100644 index b246b0e..0000000 --- a/python/cachebox/_core.pyi +++ /dev/null @@ -1,83 +0,0 @@ -import typing - -__version__: str -__author__: str - -class CoreKeyError(Exception): - """ - An exception when a key is not found in a cache. - This exception is internal to the library core and won't affect you. - """ - - ... - -KT = typing.TypeVar("KT") -VT = typing.TypeVar("VT") -DT = typing.TypeVar("DT") - -class BaseCacheImpl(typing.Generic[KT, VT]): - """ - Base implementation for cache classes in the cachebox library. - - This abstract base class defines the generic structure for cache implementations, - supporting different key and value types through generic type parameters. - Serves as a foundation for specific cache variants like Cache and FIFOCache. - """ - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] = ..., - *, - capacity: int = ..., - maxmemory: int = ..., - ) -> None: ... - @staticmethod - def __class_getitem__(*args: typing.Any) -> None: ... - @property - def maxsize(self) -> int: ... - @property - def maxmemory(self) -> int: ... - def __len__(self) -> int: ... - def __sizeof__(self) -> int: ... - def __bool__(self) -> bool: ... - def __contains__(self, key: KT) -> bool: ... - def __setitem__(self, key: KT, value: VT) -> None: ... - def __getitem__(self, key: KT) -> VT: ... - def __delitem__(self, key: KT) -> None: ... - def __str__(self) -> str: ... - def __iter__(self) -> typing.Iterator[KT]: ... - def __eq__(self, other: typing.Any) -> bool: ... - def __ne__(self, other: typing.Any) -> bool: ... - def capacity(self) -> int: ... - def memory(self) -> int: ... - def is_full(self) -> bool: ... - def is_empty(self) -> bool: ... - def insert( - self, key: KT, value: VT, *args: typing.Any, **kwargs: typing.Any - ) -> typing.Optional[VT]: ... - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: ... - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: ... - def setdefault( - self, - key: KT, - default: typing.Optional[DT] = None, - *args: typing.Any, - **kwargs: typing.Any, - ) -> typing.Optional[VT | DT]: ... - def popitem(self) -> typing.Tuple[KT, VT]: ... - def drain(self, n: int) -> int: ... - def clear(self, *, reuse: bool = False) -> None: ... - def shrink_to_fit(self) -> None: ... - def update( - self, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]], - *args: typing.Any, - **kwargs: typing.Any, - ) -> None: ... - def keys(self) -> typing.Iterable[KT]: ... - def values(self) -> typing.Iterable[VT]: ... - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: ... - def __copy__(self) -> "BaseCacheImpl[KT, VT]": ... - def __deepcopy__(self, memo: typing.Dict[str, object]) -> "BaseCacheImpl[KT, VT]": ... - def copy(self) -> "BaseCacheImpl[KT, VT]": ... diff --git a/python/cachebox/utils.py b/python/cachebox/utils.py deleted file mode 100644 index b520d28..0000000 --- a/python/cachebox/utils.py +++ /dev/null @@ -1,599 +0,0 @@ -import _thread -import asyncio -import functools -import inspect -import typing -from collections import defaultdict, namedtuple - -from ._cachebox import BaseCacheImpl, FIFOCache - -KT = typing.TypeVar("KT") -VT = typing.TypeVar("VT") -DT = typing.TypeVar("DT") -FT = typing.TypeVar("FT", bound=typing.Callable[..., typing.Any]) - - -class Frozen(BaseCacheImpl[KT, VT]): # pragma: no cover - """ - A wrapper class that prevents modifications to an underlying cache implementation. - - This class provides a read-only view of a cache, optionally allowing silent - suppression of modification attempts instead of raising exceptions. - """ - - __slots__ = ("__cache", "ignore") - - def __init__(self, cls: BaseCacheImpl[KT, VT], ignore: bool = False) -> None: - """ - Initialize a frozen cache wrapper. - - :param cls: The underlying cache implementation to be frozen - :type cls: BaseCacheImpl[KT, VT] - :param ignore: If True, silently ignores modification attempts; if False, raises TypeError when modification is attempted - :type ignore: bool, optional - """ - assert isinstance(cls, BaseCacheImpl) - assert type(cls) is not Frozen - - self.__cache = cls - self.ignore = ignore - - @property - def cache(self) -> BaseCacheImpl[KT, VT]: - return self.__cache - - @property - def maxsize(self) -> int: - return self.__cache.maxsize - - @property - def maxmemory(self) -> int: - return self.__cache.maxmemory - - def __len__(self) -> int: - return len(self.__cache) - - def __sizeof__(self) -> int: - return self.__cache.__sizeof__() - - def __bool__(self) -> bool: - return bool(self.__cache) - - def __contains__(self, key: KT) -> bool: - return key in self.__cache - - def __setitem__(self, key: KT, value: VT) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def __getitem__(self, key: KT) -> VT: - return self.__cache[key] - - def __delitem__(self, key: KT) -> None: - if self.ignore: - return None - - raise TypeError("This cache is frozen.") - - def __repr__(self) -> str: - return f"" - - def __iter__(self) -> typing.Iterator[KT]: - return iter(self.__cache) - - def __richcmp__(self, other: typing.Any, op: int) -> bool: - return self.__cache.__richcmp__(other, op) - - def capacity(self) -> int: - return self.__cache.capacity() - - def memory(self) -> int: - return self.__cache.memory() - - def is_full(self) -> bool: - return self.__cache.is_full() - - def is_empty(self) -> bool: - return self.__cache.is_empty() - - def insert(self, key: KT, value: VT, *args, **kwargs) -> typing.Optional[VT]: - if self.ignore: - return None - - raise TypeError("This cache is frozen.") - - def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - return self.__cache.get(key, default) - - def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - if self.ignore: - return None # type: ignore[return-value] - - raise TypeError("This cache is frozen.") - - def setdefault( - self, key: KT, default: typing.Optional[DT] = None, *args, **kwargs - ) -> typing.Optional[typing.Union[VT, DT]]: - if self.ignore: - return None - - raise TypeError("This cache is frozen.") - - def popitem(self) -> typing.Tuple[KT, VT]: - if self.ignore: - return # type: ignore - - raise TypeError("This cache is frozen.") - - def drain(self, n: int) -> int: - if self.ignore: - return # type: ignore - - raise TypeError("This cache is frozen.") - - def clear(self, *, reuse: bool = False) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def shrink_to_fit(self) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def update( - self, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]], - *args, - **kwargs, - ) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def keys(self) -> typing.Iterable[KT]: - return self.__cache.keys() - - def values(self) -> typing.Iterable[VT]: - return self.__cache.values() - - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: - return self.__cache.items() - - -class _LockWithCounter: - """ - A lock with a counter to track the number of waiters. - - This class provides a lock mechanism that supports both synchronous and asynchronous contexts, - with the ability to track the number of threads or coroutines waiting to acquire the lock. - """ - - __slots__ = ("lock", "waiters") - - def __init__(self, is_async: bool = False): - self.lock = _thread.allocate_lock() if not is_async else asyncio.Lock() - self.waiters = 0 - - async def __aenter__(self) -> None: - self.waiters += 1 - await self.lock.acquire() # type: ignore[misc] - - async def __aexit__(self, *args, **kwds) -> None: - self.waiters -= 1 - self.lock.release() - - def __enter__(self) -> None: - self.waiters += 1 - self.lock.acquire() - - def __exit__(self, *args, **kwds) -> None: - self.waiters -= 1 - self.lock.release() - - -def _copy_if_need(obj: VT, tocopy=(dict, list, set), level: int = 1) -> VT: - from copy import copy - - if level == 0: - return obj - - if level == 2: - return copy(obj) - - return copy(obj) if (type(obj) in tocopy) else obj - - -def make_key(args: tuple, kwds: dict, fasttype=(int, str)): - """ - Create a hashable key from function arguments for caching purposes. - - Args: - args (tuple): Positional arguments to be used in key generation. - kwds (dict): Keyword arguments to be used in key generation. - fasttype (tuple, optional): Types that can be directly used as keys. Defaults to (int, str). - - Returns: - A hashable key representing the function arguments, optimized for simple single-argument cases. - """ - key = args - if kwds: - key += (object,) - for item in kwds.items(): - key += item - - if fasttype and len(key) == 1 and type(key[0]) in fasttype: - return key[0] - - return key - - -def make_hash_key(args: tuple, kwds: dict): - """ - Create a hashable hash key from function arguments for caching purposes. - - Args: - args (tuple): Positional arguments to be used in key generation. - kwds (dict): Keyword arguments to be used in key generation. - - Returns: - int: A hash value representing the function arguments. - """ - return hash(make_key(args, kwds)) - - -def make_typed_key(args: tuple, kwds: dict): - """ - Create a hashable key from function arguments that includes type information. - - Args: - args (tuple): Positional arguments to be used in key generation. - kwds (dict): Keyword arguments to be used in key generation. - - Returns: - A hashable key representing the function arguments, including the types of the arguments. - """ - key = make_key(args, kwds, fasttype=()) - - key += tuple(type(v) for v in args) - if kwds: - key += tuple(type(v) for v in kwds.values()) - - return key - - -CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "length", "memory"]) -EVENT_MISS = 1 -EVENT_HIT = 2 - - -def _cached_wrapper( - func, - cache: typing.Union[BaseCacheImpl, typing.Callable], - key_maker: typing.Callable[[tuple, dict], typing.Hashable], - clear_reuse: bool, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]], - copy_level: int, - is_method: bool, -): - is_method = cache_is_function = inspect.isfunction(cache) - _key_maker = (lambda args, kwds: key_maker(args[1:], kwds)) if is_method else key_maker - - hits = 0 - misses = 0 - locks: defaultdict[typing.Hashable, _LockWithCounter] = defaultdict(_LockWithCounter) - exceptions: typing.Dict[typing.Hashable, BaseException] = {} - - def _wrapped(*args, **kwds): - nonlocal hits, misses, locks, exceptions - - if kwds.pop("cachebox__ignore", False): - return func(*args, **kwds) - - _cache = cache(args[0]) if cache_is_function else cache - key = _key_maker(args, kwds) - - # try to get result from cache - try: - result = _cache[key] - except KeyError: - pass - else: - # A NOTE FOR ME: we don't want to catch KeyError exceptions from `callback` - # so don't wrap it with try except - hits += 1 - - if callback is not None: - callback(EVENT_HIT, key, result) - - return _copy_if_need(result, level=copy_level) - - with locks[key]: - if exceptions.get(key, None) is not None: - cached_error = exceptions[key] if locks[key].waiters > 1 else exceptions.pop(key) - raise cached_error - - try: - result = _cache[key] - hits += 1 - event = EVENT_HIT - except KeyError: - try: - result = func(*args, **kwds) - except Exception as e: - if locks[key].waiters > 1: - exceptions[key] = e - - raise e - - else: - _cache[key] = result - misses += 1 - event = EVENT_MISS - - if callback is not None: - callback(event, key, result) - - return _copy_if_need(result, level=copy_level) - - if not cache_is_function: - _wrapped.cache = cache - _wrapped.cache_info = lambda: CacheInfo( - hits, misses, cache.maxsize, len(cache), cache.memory() - ) - - _wrapped.callback = callback - - if not cache_is_function: - - def cache_clear() -> None: - nonlocal misses, hits, locks, exceptions - cache.clear(reuse=clear_reuse) - misses = 0 - hits = 0 - locks.clear() - exceptions.clear() - - _wrapped.cache_clear = cache_clear - - return _wrapped - - -def _async_cached_wrapper( - func, - cache: typing.Union[BaseCacheImpl, typing.Callable], - key_maker: typing.Callable[[tuple, dict], typing.Hashable], - clear_reuse: bool, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]], - copy_level: int, - is_method: bool, -): - is_method = cache_is_function = inspect.isfunction(cache) - _key_maker = (lambda args, kwds: key_maker(args[1:], kwds)) if is_method else key_maker - - hits = 0 - misses = 0 - locks: defaultdict[typing.Hashable, _LockWithCounter] = defaultdict( - lambda: _LockWithCounter(True) - ) - exceptions: typing.Dict[typing.Hashable, BaseException] = {} - - async def _wrapped(*args, **kwds): - nonlocal hits, misses, locks, exceptions - - if kwds.pop("cachebox__ignore", False): - return await func(*args, **kwds) - - _cache = cache(args[0]) if cache_is_function else cache - key = _key_maker(args, kwds) - - # try to get result from cache - try: - result = _cache[key] - except KeyError: - pass - else: - # A NOTE FOR ME: we don't want to catch KeyError exceptions from `callback` - # so don't wrap it with try except - hits += 1 - - if callback is not None: - awaitable = callback(EVENT_HIT, key, result) - if inspect.isawaitable(awaitable): - await awaitable - - return _copy_if_need(result, level=copy_level) - - async with locks[key]: - if exceptions.get(key, None) is not None: - cached_error = exceptions[key] if locks[key].waiters > 1 else exceptions.pop(key) - raise cached_error - - try: - result = _cache[key] - hits += 1 - event = EVENT_HIT - except KeyError: - try: - result = await func(*args, **kwds) - except Exception as e: - if locks[key].waiters > 1: - exceptions[key] = e - - raise e - - else: - _cache[key] = result - misses += 1 - event = EVENT_MISS - - if callback is not None: - awaitable = callback(event, key, result) - if inspect.isawaitable(awaitable): - await awaitable - - return _copy_if_need(result, level=copy_level) - - if not cache_is_function: - _wrapped.cache = cache - _wrapped.cache_info = lambda: CacheInfo( - hits, misses, cache.maxsize, len(cache), cache.memory() - ) - - _wrapped.callback = callback - - if not cache_is_function: - - def cache_clear() -> None: - nonlocal misses, hits, locks, exceptions - cache.clear(reuse=clear_reuse) - misses = 0 - hits = 0 - locks.clear() - exceptions.clear() - - _wrapped.cache_clear = cache_clear - - return _wrapped - - -def cached( - cache: typing.Union[BaseCacheImpl, dict, typing.Callable[..., BaseCacheImpl], None], - key_maker: typing.Callable[[tuple, dict], typing.Hashable] = make_key, - clear_reuse: bool = False, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]] = None, - copy_level: int = 1, -) -> typing.Callable[[FT], FT]: - """ - Decorator to create a memoized cache for function results. - - Wraps a function to automatically cache and retrieve its results based on input parameters. - - Args: - cache (BaseCacheImpl, dict, callable): Cache implementation to store results. Defaults to FIFOCache. - Can be a function that got `self` and should return cache. - key_maker (Callable, optional): Function to generate cache keys from function arguments. Defaults to make_key. - clear_reuse (bool, optional): Whether to reuse cache during clearing. Defaults to False. - callback (Callable, optional): Function called on cache hit/miss events. Defaults to None. - copy_level (int, optional): Level of result copying. Defaults to 1. - - Returns: - Callable: Decorated function with caching capabilities. - - Example for functions:: - - @cachebox.cached(cachebox.LRUCache(128)) - def sum_as_string(a, b): - return str(a+b) - - assert sum_as_string(1, 2) == "3" - - assert len(sum_as_string.cache) == 1 - sum_as_string.cache_clear() - assert len(sum_as_string.cache) == 0 - - Example for methods:: - - class A: - def __init__(self, num): - self.num = num - self._cache = cachebox.FIFOCache(0) - - @cachebox.cached(lambda self: self._cache) - def method(self, n): - return self.num * n - - instance = A(10) - assert A.method(2) == 20 - """ - if cache is None: - cache = FIFOCache(0) - - if type(cache) is dict: - cache = FIFOCache(0, cache) - - if not isinstance(cache, BaseCacheImpl) and not inspect.isfunction(cache): - raise TypeError("we expected cachebox caches or function, got %r" % (cache,)) - - def decorator(func: FT) -> FT: - if inspect.iscoroutinefunction(func): - wrapper = _async_cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, False - ) - else: - wrapper = _cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, False - ) - - return functools.update_wrapper(wrapper, func) # type: ignore[return-value] - - return decorator - - -def cachedmethod( - cache: typing.Union[BaseCacheImpl, dict, None], - key_maker: typing.Callable[[tuple, dict], typing.Hashable] = make_key, - clear_reuse: bool = False, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]] = None, - copy_level: int = 1, -) -> typing.Callable[[FT], FT]: - """ - **This function is deperecated due to issue [#35](https://github.com/awolverp/cachebox/issues/35)**. - Use `cached` method instead. - - Decorator to create a method-specific memoized cache for function results. - - Similar to `cached()`, but ignores `self` parameter when generating cache keys. - - Args: - cache (BaseCacheImpl, dict, optional): Cache implementation to store results. Defaults to FIFOCache. - key_maker (Callable, optional): Function to generate cache keys from function arguments. Defaults to make_key. - clear_reuse (bool, optional): Whether to reuse cache during clearing. Defaults to False. - callback (Callable, optional): Function called on cache hit/miss events. Defaults to None. - copy_level (int, optional): Level of result copying. Defaults to 1. - - Returns: - Callable: Decorated method with method-specific caching capabilities. - """ - import warnings - - warnings.warn( - "cachedmethod is deprecated, use cached instead. see issue https://github.com/awolverp/cachebox/issues/35", - DeprecationWarning, - stacklevel=2, - ) - - if cache is None: - cache = FIFOCache(0) - - if type(cache) is dict: - cache = FIFOCache(0, cache) - - if not isinstance(cache, BaseCacheImpl): - raise TypeError("we expected cachebox caches, got %r" % (cache,)) - - def decorator(func: FT) -> FT: - if inspect.iscoroutinefunction(func): - wrapper = _async_cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, True - ) - else: - wrapper = _cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, True - ) - - return functools.update_wrapper(wrapper, func) # type: ignore[return-value] - - return decorator - - -def is_cached(func: object) -> bool: - """ - Check if a function/method cached by cachebox or not - """ - return hasattr(func, "cache") and isinstance(func.cache, BaseCacheImpl) diff --git a/python/tests/conftest.py b/python/tests/conftest.py deleted file mode 100644 index f46c034..0000000 --- a/python/tests/conftest.py +++ /dev/null @@ -1,30 +0,0 @@ -import cachebox -import pytest -import typing - - -@pytest.fixture( - scope="function", - params=[ - cachebox.Cache, - cachebox.FIFOCache, - cachebox.LFUCache, - cachebox.LRUCache, - cachebox.TTLCache, - cachebox.RRCache, - cachebox.VTTLCache, - ], -) -def random_cache_impl(request): - typ: typing.Type[cachebox.BaseCacheImpl] = request.param - - def inner(maxsize, iterable=None): - if typ is cachebox.TTLCache: - return typ(maxsize, ttl=10, iterable=iterable) - - if typ is cachebox.VTTLCache: - return typ(maxsize, ttl=10, iterable=iterable) - - return typ(maxsize, iterable=iterable) - - return inner diff --git a/python/tests/mixin.py b/python/tests/mixin.py deleted file mode 100644 index 6afb29b..0000000 --- a/python/tests/mixin.py +++ /dev/null @@ -1,570 +0,0 @@ -import dataclasses -import sys -import typing - -import pytest -from cachebox import BaseCacheImpl, TTLCache - - -@dataclasses.dataclass -class EQ: - def __init__(self, val: int) -> None: - self.val = val - - def __eq__(self, other: "EQ") -> bool: - return self.val == other.val - - def __hash__(self) -> int: - return self.val - - -@dataclasses.dataclass -class NoEQ: - def __init__(self, val: int) -> None: - self.val = val - - def __hash__(self) -> int: - return self.val - - -@dataclasses.dataclass -class Sized: - size: int - key: int - - def __sizeof__(self) -> int: - return self.size - - def __hash__(self) -> int: - return self.key - - def __eq__(self, other: object) -> bool: - if not isinstance(other, Sized): - return False - return self.key == other.key - - -class SizeError: - def __sizeof__(self) -> int: - raise ValueError("boom") - - -def getsizeof(obj, use_sys=True): # pragma: no cover - try: - if use_sys: - return sys.getsizeof(obj) - else: - return obj.__sizeof__() - except TypeError: # PyPy doesn't implement getsizeof or __sizeof__ - return len(obj) - - -class _TestMixin: # pragma: no cover - CACHE: typing.Type[BaseCacheImpl] - - KWARGS: dict = {} - NO_POLICY: bool = False - - def test__new__(self): - cache = self.CACHE(10, **self.KWARGS, capacity=8) - assert cache.maxsize == 10 - assert 20 > cache.capacity() >= 8, "capacity: {}".format(cache.capacity()) - assert cache.maxmemory == sys.maxsize - - cache = self.CACHE(20, **self.KWARGS, capacity=0) - assert cache.maxsize == 20 - assert 2 >= cache.capacity() >= 0 # This is depends on platform - assert cache.maxmemory == sys.maxsize - - cache = self.CACHE(20, **self.KWARGS, capacity=100) - assert cache.maxsize == 20 - assert 30 > cache.capacity() >= 20 - assert cache.maxmemory == sys.maxsize - - cache = self.CACHE(0, **self.KWARGS, capacity=8) - assert cache.maxsize == sys.maxsize - assert 20 > cache.capacity() >= 8 - assert cache.maxmemory == sys.maxsize - - cache = self.CACHE(10, **self.KWARGS, capacity=8, maxmemory=30) - assert cache.maxsize == 10 - assert 20 > cache.capacity() >= 8 - assert cache.maxmemory == 30 - - def test_overflow(self): - if not self.NO_POLICY: - return - - cache = self.CACHE(10, **self.KWARGS, capacity=10) - - for i in range(10): - cache[i] = i - - with pytest.raises(OverflowError): - cache["new-key"] = "new-value" - - def test_maxmemory_config(self): - cache = self.CACHE(10, **self.KWARGS, maxmemory=128) - assert cache.maxmemory == 128 - assert cache.memory() == 0 - - def test_maxmemory_enforced(self): - cache = self.CACHE(0, **self.KWARGS, maxmemory=100) - - k1 = Sized(10, 1) - v1 = Sized(80, 101) - cache[k1] = v1 - - k2 = Sized(10, 2) - v2 = Sized(80, 102) - - if self.NO_POLICY: - with pytest.raises(OverflowError): - cache[k2] = v2 - assert k1 in cache - else: - cache[k2] = v2 - assert k2 in cache - assert cache.memory() <= cache.maxmemory - - def test_maxmemory_enforced_base_types(self): - size_of_int = sys.getsizeof(1, 1) - - cache = self.CACHE(0, **self.KWARGS, maxmemory=size_of_int * 10) - - for i in range(5): - cache[i] = i - - if self.NO_POLICY: - with pytest.raises(OverflowError): - cache[10] = 10 - - assert 1 in cache - else: - cache[10] = 10 - assert 10 in cache - assert cache.memory() <= cache.maxmemory - - def test_update_overflow_preserves_entry(self): - cache = self.CACHE(0, **self.KWARGS, maxmemory=60) - - key = Sized(10, 1) - value = Sized(10, 101) - cache[key] = value - - too_big = Sized(100, 102) - with pytest.raises(OverflowError): - cache[key] = too_big - - assert cache[key].key == 101 - assert cache.memory() <= cache.maxmemory - - def test_update_sizeof_error_preserves_entry(self): - cache = self.CACHE(0, **self.KWARGS, maxmemory=60) - - key = Sized(10, 1) - value = Sized(10, 101) - cache[key] = value - - with pytest.raises(ValueError): - cache[key] = SizeError() - - assert cache[key].key == 101 - - def test___len__(self): - cache = self.CACHE(10, **self.KWARGS, capacity=10) - - assert len(cache) == 0 - assert cache.is_empty() ^ bool(cache) - - cache[0] = 0 - assert len(cache) == 1 - - cache[1] = 1 - cache[2] = 2 - cache[3] = 3 - assert len(cache) == 4 - - cache[0] = 10 - cache[1] = 5 - assert len(cache) == 4 - - for i in range(1000, 1000 + (10 - len(cache))): - cache[i] = i - - assert len(cache) == 10 - assert cache.is_full() - - def test___contains__(self): - cache = self.CACHE(1, **self.KWARGS, capacity=1) - - assert 1 not in cache - cache[1] = 1 - assert 1 in cache - - def test___setitem__(self): - cache = self.CACHE(10, **self.KWARGS, capacity=10) - - with pytest.raises(KeyError): - cache[1] - - cache[1] = 1 - cache[1] - cache[0] = 0 - cache[0] - cache[2] = 2 - cache[3] = 3 - - with pytest.raises(KeyError): - cache[4] - - del cache[1] - del cache[2] - del cache[3] - - with pytest.raises(KeyError): - del cache["error"] - - cache[0] - - with pytest.raises(KeyError): - cache[2] - - def test___repr__(self): - cache = self.CACHE(1000, **self.KWARGS, capacity=2) - assert repr(cache).startswith(self.CACHE.__module__ + "." + self.CACHE.__name__) - - cache.update((i, i) for i in range(1000)) - assert str(cache) == repr(cache) - - def test_insert(self): - cache = self.CACHE(5, **self.KWARGS, capacity=5) - - assert cache.insert(1, 1) is None - assert cache.insert(1, 1) == 1 - assert cache.insert(1, 10) == 1 - assert cache.insert(1, 2) == 10 - - cache[5] = 5 - - assert cache.insert(5, "value") == 5 - assert cache.insert(5, 5) == "value" - - del cache[5] - - assert cache.insert(5, 5) is None - - def test_get(self): - cache = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - cache[i] = i - - assert cache.get(0, None) == 0 - assert cache.get(1, None) == 1 - assert cache.get("no-exists") is None - assert cache.get("no-exists", None) is None - assert cache.get("no-exists", 111) == 111 - - def test_pop(self): - cache = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - cache[i] = i * 2 - - assert cache.pop(1, None) == 2 - assert cache.get(1, None) is None - assert cache.pop(2, None) == 4 - assert cache.get(2, None) is None - - assert cache.pop(10, None) is None - assert cache.pop(10, 2) == 2 - - def test_setdefault(self): - obj = self.CACHE(2, **self.KWARGS, capacity=2) - - obj.setdefault("name", "nick") - obj["age"] = 18 - assert 18 == obj.setdefault("age", 1000) - assert 18 == obj["age"] - assert "nick" == obj["name"] - - if self.NO_POLICY: - with pytest.raises(OverflowError): - obj.setdefault("newkey", 0) - - def test_clear(self): - obj = self.CACHE(2, **self.KWARGS, capacity=2) - - obj[1] = 1 - obj[2] = 2 - assert 2 == len(obj) - - cap = getsizeof(obj, False) - obj.clear(reuse=True) - assert 0 == len(obj) - try: - assert getsizeof(obj, False) >= cap - except AssertionError as e: - # if not isinstance(obj, (LRUCache, LFUCache)): - raise e - - obj[1] = 1 - obj[2] = 2 - assert 2 == len(obj) - - cap = getsizeof(obj, False) - obj.clear(reuse=False) - assert 0 == len(obj) - # this is not stable and - # may increases the capacity! - try: - assert cap != getsizeof(obj, False) - except AssertionError as e: - # if not isinstance(obj, (LRUCache, LFUCache)): - raise e - - def test_update(self): - obj = self.CACHE(2, **self.KWARGS, capacity=2) - - obj.update({1: 1, 2: 2}) - assert 2 == len(obj) - assert 1 == obj[1] - assert 2 == obj[2] - - obj.update({1: 1, 2: 2}) - assert 2 == len(obj) - assert 1 == obj[1] - assert 2 == obj[2] - - obj.update([(1, "a"), (2, "b")]) - assert 2 == len(obj) - assert "a" == obj[1] - assert "b" == obj[2] - - if self.NO_POLICY: - with pytest.raises(OverflowError): - obj.update([(3, "a"), (4, "b")]) - else: - obj.update([(3, "a"), (4, "b")]) - - kw = self.KWARGS.copy() - kw["iterable"] = {1: 1, 2: 2} - obj = self.CACHE(2, **kw, capacity=2) - assert 2 == len(obj) - assert 1 == obj[1] - assert 2 == obj[2] - - kw["iterable"] = [(1, "a"), (2, "b")] - obj = self.CACHE(2, **kw, capacity=2) - assert 2 == len(obj) - assert "a" == obj[1] - assert "b" == obj[2] - - def test_eq_implemetation(self): - # see https://github.com/awolverp/cachebox/issues/5 - - size = 1000 - cache = self.CACHE(size, **self.KWARGS, capacity=size) - - for i in range(size): - cache.insert(NoEQ(val=i), i) - cache.get(NoEQ(val=i)) - - cache = self.CACHE(size, **self.KWARGS, capacity=size) - - for i in range(size): - cache.insert(EQ(val=i), i) - cache.get(EQ(val=i)) - - def test_iterators(self): - obj = self.CACHE(100, **self.KWARGS, capacity=100) - - for i in range(6): - obj[i] = i * 2 - - k = list(range(6)) - v = list(i * 2 for i in range(6)) - assert k == sorted(obj.keys()) - assert v == sorted(obj.values()) - assert list(zip(k, v)) == sorted(obj.items()) - - with pytest.raises(RuntimeError): - for i in obj: - del obj[i] - - for i in range(100): - obj[i] = i * 2 - - for i in range(50): - del obj[i] - - p = iter(obj) - next(p) - - obj.shrink_to_fit() - - with pytest.raises(RuntimeError): - next(p) - - obj = self.CACHE(0, **self.KWARGS) - obj.update({i: i for i in range(20)}) - - for key, value in obj.items(): - assert obj[key] == value - - try: - for key, value in obj.items(): - obj[key] = value * 2 - except RuntimeError: - if not isinstance(obj, TTLCache): - raise - - with pytest.raises(RuntimeError): - for key, value in obj.items(): - obj[str(key)] = value - - def test___eq__(self): - cache = self.CACHE(100, **self.KWARGS, capacity=100) - - with pytest.raises(TypeError): - cache > cache - - with pytest.raises(TypeError): - cache < cache - - with pytest.raises(TypeError): - cache >= cache - - with pytest.raises(TypeError): - cache <= cache - - assert cache == cache - assert not cache != cache - - for i in range(90): - cache[i] = i - - assert cache == cache - assert not cache != cache - - c2 = self.CACHE(100, **self.KWARGS, capacity=100) - for i in range(90): - c2[i] = i - - assert cache == c2 - assert not c2 != cache - - c2 = self.CACHE(1000, **self.KWARGS, capacity=100) - for i in range(90): - c2[i] = i - - assert not cache == c2 - assert c2 != cache - - def _test_pickle(self, check_order: typing.Callable): - import pickle - import tempfile - - c1 = self.CACHE(maxsize=0, **self.KWARGS) - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - - c1 = self.CACHE(maxsize=100, **self.KWARGS) - c1.update({i: i for i in range(10)}) - - for _ in range(10): - c1[0] - for _ in range(9): - c1[1] - for _ in range(8): - c1[2] - for _ in range(7): - c1[3] - for _ in range(6): - c1[4] - for _ in range(5): - c1[5] - for _ in range(4): - c1[6] - for _ in range(3): - c1[7] - for _ in range(2): - c1[8] - for _ in range(1): - c1[9] - - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2, f"{c1} - {c2}" - assert c1.capacity() == c2.capacity() - check_order(c1, c2) - - with tempfile.TemporaryFile("w+b") as fd: - c1 = self.CACHE(maxsize=100, **self.KWARGS) - c1.update({i: i for i in range(10)}) - - for _ in range(10): - c1[1] - for _ in range(9): - c1[2] - for _ in range(8): - c1[0] - for _ in range(7): - c1[3] - for _ in range(6): - c1[5] - for _ in range(5): - c1[4] - for _ in range(4): - c1[6] - for _ in range(3): - c1[7] - for _ in range(2): - c1[9] - for _ in range(1): - c1[8] - - pickle.dump(c1, fd) - fd.seek(0) - c2 = pickle.load(fd) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - check_order(c1, c2) - - def test_copy(self): - import copy - - # shallow copy - c1 = self.CACHE(maxsize=0, **self.KWARGS) - c1.insert("dict", {}) - c2 = c1.copy() - - assert c2 == c1 - c2["dict"][1] = 1 - - assert c1["dict"][1] == 1 - - c2.insert(1, 1) - assert 1 not in c1 - - # deepcopy - c1 = self.CACHE(maxsize=0, **self.KWARGS) - c1.insert("dict", {}) - c2 = copy.deepcopy(c1) - - assert c2 == c1 - c2["dict"][1] = 1 - - assert 1 not in c1["dict"] - - c2.insert(1, 1) - assert 1 not in c1 - - def test_cache_type(self): - class AType: - pass - - cache = self.CACHE(maxsize=0, **self.KWARGS) - cache[AType] = AType - assert cache[AType] is AType diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py deleted file mode 100644 index 7ec2ea7..0000000 --- a/python/tests/test_caches.py +++ /dev/null @@ -1,612 +0,0 @@ -import time -from datetime import timedelta - -import pytest -from cachebox import ( - Cache, - FIFOCache, - LFUCache, - LRUCache, - RRCache, - TTLCache, - VTTLCache, -) - -from .mixin import Sized, _TestMixin - - -class TestCache(_TestMixin): - CACHE = Cache - NO_POLICY = True - - def test_pickle(self): - self._test_pickle(lambda c1, c2: None) - - -class TestFIFOCache(_TestMixin): - CACHE = FIFOCache - - def test_policy(self): - cache = FIFOCache(5) - - cache[0] = 0 - cache[1] = 1 - cache[2] = 2 - - assert cache[0] == 0 - assert cache[1] == 1 - - assert cache.popitem() == (0, 0) - - cache[3] = 3 - - assert cache.popitem() == (1, 1) - assert cache.popitem() == (2, 2) - assert cache.popitem() == (3, 3) - - with pytest.raises(KeyError): - cache.popitem() - - for i in range(5): - cache[i] = i - - for i in range(5): - assert i in cache - - cache[10] = 10 - - assert 0 not in cache - assert 10 in cache - - assert cache.popitem() == (1, 1) - - del cache[2] - del cache[3] - del cache[4] - - assert cache.popitem() == (10, 10) - - def test_update_can_evict_self_on_maxmemory(self): - cache = FIFOCache(0, maxmemory=50) - - k1 = Sized(10, 1) - v1 = Sized(10, 101) - k2 = Sized(10, 2) - v2 = Sized(10, 102) - - cache[k1] = v1 - cache[k2] = v2 - - cache[k1] = Sized(40, 103) - - assert k1 not in cache - assert k2 in cache - assert cache.memory() <= cache.maxmemory - - def test_ordered_iterators(self): - obj = self.CACHE(100, **self.KWARGS, capacity=100) - - for i in range(6): - obj[i] = i * 2 - - k = list(range(6)) - v = list(i * 2 for i in range(6)) - assert k == list(obj.keys()) - assert v == list(obj.values()) - assert list(zip(k, v)) == list(obj.items()) - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - def test_first_last(self): - obj = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - obj[i] = i * 2 - - assert obj.first() == 0 - assert obj.last() == 4 - - obj[10] = 20 - - assert obj.first() == 1 - assert obj.last() == 10 - assert obj.first(-1) == obj.last() - assert obj.first(-10000) is None - - -class TestRRCache(_TestMixin): - CACHE = RRCache - - def test_popitem(self): - obj = RRCache(3) - with pytest.raises(KeyError): - obj.popitem() - with pytest.raises(KeyError): - obj.random_key() - - obj[1] = 1 - assert obj.random_key() == 1 - assert obj.popitem() == (1, 1) - - def test_pickle(self): - self._test_pickle(lambda c1, c2: None) - - -class TestLRUCache(_TestMixin): - CACHE = LRUCache - - def test_policy(self): - obj = self.CACHE(3) - - obj[1] = 1 - obj[2] = 2 - obj[3] = 3 - - assert (1, 1) == obj.popitem() - - obj[1] = 1 - obj[2] - - assert (3, 3) == obj.popitem() - - obj[4] = 4 - assert 1 == obj.get(1) - - obj[5] = 5 - assert 2 not in obj - - def test_ordered_iterators(self): - obj = self.CACHE(20, **self.KWARGS, capacity=20) - - for i in range(6): - obj[i] = i * 2 - - obj[1] - obj[5] - obj[3] = 7 - - k = [0, 2, 4, 1, 5, 3] - v = [0, 4, 8, 2, 10, 7] - assert k == list(obj.keys()) - assert v == list(obj.values()) - assert list(zip(k, v)) == list(obj.items()) - - def test_recently_used_funcs(self): - obj = LRUCache(10) - - for i in range(6): - obj[i] = i * 2 - - obj[1] - obj[5] - obj[3] = 7 - obj.peek(4) - - assert obj.peek(6) is None - - assert obj.most_recently_used() == 3 - assert obj.least_recently_used() == 0 - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - -class TestLFUCache(_TestMixin): - CACHE = LFUCache - - def test_policy(self): - obj = self.CACHE(5, {i: i for i in range(5)}) - - for i in range(5): - obj[i] = i - - for i in range(10): - assert 0 == obj[0] - for i in range(7): - assert 1 == obj[1] - for i in range(3): - assert 2 == obj[2] - for i in range(4): - assert 3 == obj[3] - for i in range(6): - assert 4 == obj[4] - - assert (2, 2) == obj.popitem() - assert (3, 3) == obj.popitem() - - for i in range(10): - assert 4 == obj.get(4) - - assert (1, 1) == obj.popitem() - - assert 2 == len(obj) - obj.clear() - - for i in range(5): - obj[i] = i - - assert [0, 1, 2, 3, 4] == list(obj.keys()) - - for i in range(10): - obj[0] += 1 - for i in range(7): - obj[1] += 1 - for i in range(3): - obj[2] += 1 - for i in range(4): - obj[3] += 1 - for i in range(6): - obj[4] += 1 - - obj[5] = 4 - assert [5, 3, 4, 1, 0] == list(obj.keys()) - - def test_items_with_frequency(self): - # no need to test completely items_with_frequency - # because it's tested in test_iterators - obj = LFUCache(10, {1: 2, 3: 4}) - for key, val, freq in obj.items_with_frequency(): - assert key in obj - assert val == obj[key] - assert isinstance(freq, int) - - def test_least_frequently_used(self): - obj = LFUCache(10) - - for i in range(5): - obj[i] = i * 2 - - for i in range(10): - obj[0] += 1 - for i in range(7): - obj[1] += 1 - for i in range(3): - obj[2] += 1 - for i in range(4): - obj[3] += 1 - for i in range(6): - obj[4] += 1 - - assert obj.least_frequently_used() == 2 - assert obj.least_frequently_used(1) == 3 - assert obj.least_frequently_used(4) == 0 - assert obj.least_frequently_used(5) is None - assert obj.least_frequently_used(5) is None - assert obj.least_frequently_used(-len(obj)) == obj.least_frequently_used() - assert obj.least_frequently_used(-1000) is None - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - -class TestTTLCache(_TestMixin): - CACHE = TTLCache - KWARGS = {"ttl": 10} - - def test__new__(self): - super().test__new__() - - cache = TTLCache(0, timedelta(minutes=2, seconds=20)) - assert cache.ttl == (2 * 60) + 20 - - with pytest.raises(ValueError): - TTLCache(0, -10) - - def test_policy(self): - obj = self.CACHE(2, 0.5) - assert obj.ttl == 0.5 - - obj.insert(0, 1) - time.sleep(0.8) - - with pytest.raises(KeyError): - obj[0] - - obj = self.CACHE(2, 20) - - obj.insert(0, 0) - obj.insert(1, 1) - obj.insert(2, 2) - - assert 0 not in obj - assert (1, 1) == obj.popitem() - - def test_update_with_ttl(self): - obj = self.CACHE(2, 0.5) - - # obj.update({1: 1, 2: 2, 3: 3}) - obj.update((i + 1, i + 1) for i in range(3)) - - with pytest.raises(KeyError): - obj[1] - - time.sleep(0.8) - - with pytest.raises(KeyError): - obj[2] - - with pytest.raises(KeyError): - obj[3] - - def test_policy_ttl_no_care(self): - cache = TTLCache(5, 10) - - cache[0] = 0 - cache[1] = 1 - cache[2] = 2 - - assert cache[0] == 0 - assert cache[1] == 1 - - assert cache.popitem() == (0, 0) - - cache[3] = 3 - - assert cache.popitem() == (1, 1) - assert cache.popitem() == (2, 2) - assert cache.popitem() == (3, 3) - - with pytest.raises(KeyError): - cache.popitem() - - for i in range(5): - cache[i] = i - - for i in range(5): - assert i in cache - - cache[10] = 10 - - assert 0 not in cache - assert 10 in cache - - assert cache.popitem() == (1, 1) - - del cache[2] - del cache[3] - del cache[4] - - assert cache.popitem() == (10, 10) - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - def test_first_last(self): - obj = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - obj[i] = i * 2 - - assert obj.first() == 0 - assert obj.last() == 4 - - obj[10] = 20 - - assert obj.first() == 1 - assert obj.last() == 10 - - def test_get_with_expire(self): - obj = TTLCache(2, 10) - - obj.insert(1, 1) - time.sleep(0.1) - value, dur = obj.get_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.get_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.get_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_pop_with_expire(self): - obj = TTLCache(2, 10) - - obj.insert(1, 1) - time.sleep(0.1) - value, dur = obj.pop_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.pop_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.pop_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_popitem_with_expire(self): - obj = TTLCache(2, 10) - - obj.insert(1, 1) - obj.insert(2, 2) - time.sleep(0.1) - key, value, dur = obj.popitem_with_expire() - assert (1, 1) == (key, value) - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - key, value, dur = obj.popitem_with_expire() - assert (2, 2) == (key, value) - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - with pytest.raises(KeyError): - obj.popitem_with_expire() - - def test_items_with_expire(self): - # no need to test completely items_with_expire - # because it's tested in test_iterators - obj = TTLCache(10, 3, {1: 2, 3: 4}) - for key, val, ttl in obj.items_with_expire(): - assert key in obj - assert val == obj[key] - assert isinstance(ttl, float) - - -class TestVTTLCache(_TestMixin): - CACHE = VTTLCache - - def test_policy(self): - obj = VTTLCache(2) - - obj.insert(0, 1, 0.5) - time.sleep(0.501) - - with pytest.raises(KeyError): - obj[0] - - obj.insert("name", "nick", 0.3) - obj.insert("age", 18, None) - time.sleep(0.301) - - with pytest.raises(KeyError): - obj["name"] - - del obj["age"] - - obj.insert(0, 0, 70) - obj.insert(1, 1, 60) - obj.insert(2, 2, 90) - - assert 1 not in obj - assert (0, 0) == obj.popitem() - - def test_update_with_ttl(self): - obj = VTTLCache(3) - - obj.update({1: 1, 2: 2, 3: 3}, 0.5) - time.sleep(0.501) - - with pytest.raises(KeyError): - obj[1] - - with pytest.raises(KeyError): - obj[2] - - with pytest.raises(KeyError): - obj[3] - - def test_get_with_expire(self): - obj = VTTLCache(2) - - obj.insert(1, 1, 10) - time.sleep(0.1) - value, dur = obj.get_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.get_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.get_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_pop_with_expire(self): - obj = VTTLCache(2) - - obj.insert(1, 1, 10) - time.sleep(0.1) - value, dur = obj.pop_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.pop_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.pop_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_popitem_with_expire(self): - obj = VTTLCache(2) - - obj.insert(1, 1, 10) - obj.insert(2, 2, 6) - time.sleep(0.1) - key, value, dur = obj.popitem_with_expire() - assert (2, 2) == (key, value) - assert 6 > dur > 5, "6 > dur > 5 failed [dur: %f]" % dur - - key, value, dur = obj.popitem_with_expire() - assert (1, 1) == (key, value) - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - with pytest.raises(KeyError): - obj.popitem_with_expire() - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - import pickle - import tempfile - - c1 = self.CACHE(maxsize=0, **self.KWARGS) - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - - c1 = self.CACHE(maxsize=100, **self.KWARGS) - - for i in range(10): - c1.insert(i, i * 2, i + 2) - - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - inner(c1, c2) - - with tempfile.TemporaryFile("w+b") as fd: - c1 = self.CACHE(maxsize=100, **self.KWARGS) - c1.update({i: i for i in range(10)}) - - for i in range(10): - c1.insert(i, i * 2, i + 2) - - pickle.dump(c1, fd) - fd.seek(0) - c2 = pickle.load(fd) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - inner(c1, c2) - - c1 = self.CACHE(maxsize=100, **self.KWARGS) - - for i in range(10): - c1.insert(i, i * 2, i + 0.5) - - time.sleep(0.51) - - c2 = pickle.loads(pickle.dumps(c1)) - - assert len(c2) == len(c1) - assert abs(c2.capacity() - c1.capacity()) < 2 - inner(c1, c2) - - def test_items_with_expire(self): - # no need to test completely items_with_expire - # because it's tested in test_iterators - obj = VTTLCache(10, {1: 2, 3: 4}, ttl=10) - for key, val, ttl in obj.items_with_expire(): - assert key in obj - assert val == obj[key] - assert isinstance(ttl, float) diff --git a/python/tests/test_concurrency.py b/python/tests/test_concurrency.py deleted file mode 100644 index 2935ee1..0000000 --- a/python/tests/test_concurrency.py +++ /dev/null @@ -1,108 +0,0 @@ -from cachebox import cached, LRUCache -from concurrent import futures -import asyncio -import pytest -import time - - -def test_threading_return(): - calls = 0 - - @cached(LRUCache(0)) - def func(): - nonlocal calls - time.sleep(1) - calls += 1 - return "Hello" - - with futures.ThreadPoolExecutor(max_workers=10) as executor: - future_list = [executor.submit(func) for _ in range(10)] - for future in futures.as_completed(future_list): - assert future.result() == "Hello" - - assert calls == 1 - - -def test_threading_exc(): - calls = 0 - - @cached(LRUCache(0)) - def func(): - nonlocal calls - time.sleep(1) - calls += 1 - raise RuntimeError - - with futures.ThreadPoolExecutor(max_workers=5) as executor: - future_list = [executor.submit(func) for _ in range(5)] - for future in futures.as_completed(future_list): - assert isinstance(future.exception(), RuntimeError) - - assert calls == 1 - - with futures.ThreadPoolExecutor(max_workers=5) as executor: - future_list = [executor.submit(func) for _ in range(5)] - for future in futures.as_completed(future_list): - assert isinstance(future.exception(), RuntimeError) - - assert calls == 2 - - -@pytest.mark.asyncio -async def test_asyncio_return(): - calls = 0 - - @cached(LRUCache(0)) - async def func(): - nonlocal calls - await asyncio.sleep(1) - calls += 1 - return "Hello" - - await asyncio.gather( - func(), - func(), - func(), - func(), - func(), - ) - - assert calls == 1 - - -@pytest.mark.asyncio -async def test_asyncio_exc(): - calls = 0 - - @cached(LRUCache(0)) - async def func(): - nonlocal calls - await asyncio.sleep(1) - calls += 1 - raise RuntimeError - - tasks = await asyncio.gather( - func(), - func(), - func(), - func(), - func(), - return_exceptions=True, - ) - for future in tasks: - assert isinstance(future, RuntimeError) - - assert calls == 1 - - tasks = await asyncio.gather( - func(), - func(), - func(), - func(), - func(), - return_exceptions=True, - ) - for future in tasks: - assert isinstance(future, RuntimeError) - - assert calls == 2 diff --git a/python/tests/test_utils.py b/python/tests/test_utils.py deleted file mode 100644 index 6d6dc7c..0000000 --- a/python/tests/test_utils.py +++ /dev/null @@ -1,415 +0,0 @@ -from cachebox import ( - Frozen, - LRUCache, - BaseCacheImpl, - cached, - make_typed_key, - make_key, - EVENT_HIT, - EVENT_MISS, - is_cached, -) -import asyncio -import pytest -import time - - -def test_frozen(random_cache_impl: type[BaseCacheImpl]): - cache = random_cache_impl(10, {i: i for i in range(8)}) - f = Frozen(cache) - - assert f.maxsize == cache.maxsize - - with pytest.raises(TypeError): - f[0] = 0 - - with pytest.raises(TypeError): - f.pop(0) - - with pytest.raises(TypeError): - f.popitem() - - assert len(f) == 8 - assert len(f) == len(cache) - cache.insert(9, 9) - assert len(f) == 9 - assert len(f) == len(cache) - - f = Frozen(cache, ignore=True) - f.popitem() - - -def test_cached(random_cache_impl: type[BaseCacheImpl]): - obj = random_cache_impl(3) - - @cached(obj) - def factorial(n): - fact = 1 - for num in range(2, n + 1): - fact *= num - - time.sleep(0.1) # need for testing - return fact - - perf_1 = time.perf_counter() - factorial(15) - perf_1 = time.perf_counter() - perf_1 - - assert factorial.cache_info().length == 1 - assert factorial.cache_info().misses == 1 - - perf_2 = time.perf_counter() - factorial(15) - perf_2 = time.perf_counter() - perf_2 - - assert perf_1 > perf_2 - assert factorial.cache_info().hits == 1 - - factorial.cache_clear() - assert factorial.cache_info().hits == 0 - assert factorial.cache_info().misses == 0 - - perf_3 = time.perf_counter() - factorial(15) - perf_3 = time.perf_counter() - perf_3 - assert perf_3 > perf_2 - - # test cachebox__ignore - factorial.cache_clear() - assert len(factorial.cache) == 0 - factorial(15, cachebox__ignore=True) - assert len(factorial.cache) == 0 - - -def test_key_makers(random_cache_impl: type[BaseCacheImpl]): - @cached(random_cache_impl(125), key_maker=make_key) - def func(a, b, c): - return a, b, c - - func(1, 2, 3) - func(1.0, 2, 3.0) - func(3, 2, 1) - - assert len(func.cache) == 2 - - @cached(random_cache_impl(125), key_maker=make_typed_key) - def func(a, b, c): - return a, b, c - - func(1, 2, 3) - func(1.0, 2, 3.0) - func(3, 2, 1) - - assert len(func.cache) == 3 - - -@pytest.mark.asyncio -async def test_async_cached(random_cache_impl: type[BaseCacheImpl]): - obj = random_cache_impl(3) - - @cached(obj) - async def factorial(n: int, _: str): - fact = 1 - for num in range(2, n + 1): - fact *= num - - await asyncio.sleep(0.1) # need for testing - return fact - - perf_1 = time.perf_counter() - await factorial(15, "cachebox") - perf_1 = time.perf_counter() - perf_1 - - assert factorial.cache_info().length == 1 - assert factorial.cache_info().misses == 1 - - perf_2 = time.perf_counter() - await factorial(15, "cachebox") - perf_2 = time.perf_counter() - perf_2 - - assert perf_1 > perf_2 - assert factorial.cache_info().hits == 1 - - factorial.cache_clear() - assert factorial.cache_info().hits == 0 - assert factorial.cache_info().misses == 0 - - perf_3 = time.perf_counter() - await factorial(15, "cachebox") - perf_3 = time.perf_counter() - perf_3 - assert perf_3 > perf_2 - - # test cachebox__ignore - factorial.cache_clear() - assert len(factorial.cache) == 0 - await factorial(15, "me", cachebox__ignore=True) - assert len(factorial.cache) == 0 - - -def test_cachedmethod(): - class TestCachedMethod: - def __init__(self, num) -> None: - self.num = num - - @cached(None) - def method(self, char: str): - assert type(self) is TestCachedMethod - return char * self.num - - cls = TestCachedMethod(10) - assert cls.method("a") == ("a" * 10) - - cls = TestCachedMethod(2) - assert cls.method("a") == ("a" * 2) - - -@pytest.mark.asyncio -async def test_async_cachedmethod(random_cache_impl: type[BaseCacheImpl]): - class TestCachedMethod: - def __init__(self, num) -> None: - self.num = num - - @cached(random_cache_impl(0)) - async def method(self, char: str): - assert type(self) is TestCachedMethod - return char * self.num - - cls = TestCachedMethod(10) - assert (await cls.method("a")) == ("a" * 10) - - -def test_callback(random_cache_impl: type[BaseCacheImpl]): - obj = random_cache_impl(3) - - called = list() - - @cached( - obj, - key_maker=lambda args, _: args[0], - callback=lambda event, key, value: called.append((event, key, value)), - ) - def factorial(n: int, /): - fact = 1 - for num in range(2, n + 1): - fact *= num - - return fact - - assert factorial(5) == 120 - assert len(called) == 1 - assert called[0] == (EVENT_MISS, 5, 120) - - assert factorial(5) == 120 - assert len(called) == 2 - assert called[1] == (EVENT_HIT, 5, 120) - - assert factorial(3) == 6 - assert len(called) == 3 - assert called[2] == (EVENT_MISS, 3, 6) - - assert is_cached(factorial) - - -async def _test_async_callback(random_cache_impl: type[BaseCacheImpl]): - obj = random_cache_impl(3) - - called = list() - - async def _callback(event, key, value): - called.append((event, key, value)) - - @cached(obj, key_maker=lambda args, _: args[0], callback=_callback) - async def factorial(n: int, /): - fact = 1 - for num in range(2, n + 1): - fact *= num - - return fact - - assert await factorial(5) == 120 - assert len(called) == 1 - assert called[0] == (EVENT_MISS, 5, 120) - - assert await factorial(5) == 120 - assert len(called) == 2 - assert called[1] == (EVENT_HIT, 5, 120) - - assert await factorial(3) == 6 - assert len(called) == 3 - assert called[2] == (EVENT_MISS, 3, 6) - - assert is_cached(factorial) - assert not is_cached(_callback) - - -def test_async_callback(random_cache_impl: type[BaseCacheImpl]): - try: - loop = asyncio.get_running_loop() - except RuntimeError: - loop = asyncio.new_event_loop() - - loop.run_until_complete(_test_async_callback(random_cache_impl)) - - -def test_copy_level(random_cache_impl: type[BaseCacheImpl]): - class A: - def __init__(self, c: int) -> None: - self.c = c - - @cached(random_cache_impl(0)) - def func(c: int) -> A: - return A(c) - - result = func(1) - assert result.c == 1 - result.c = 2 - - result = func(1) - assert result.c == 2 # !!! - - @cached(random_cache_impl(0), copy_level=2) - def func(c: int) -> A: - return A(c) - - result = func(1) - assert result.c == 1 - result.c = 2 - - result = func(1) - assert result.c == 1 # :) - - -def test_classmethod(): - class MyClass: - def __init__(self, num: int) -> None: - self.num = num - - @classmethod - @cached(None, copy_level=2) - def new(cls, num: int): - return cls(num) - - a = MyClass.new(1) - assert isinstance(a, MyClass) and a.num == 1 - - -def test_staticmethod(): - class MyClass: - def __init__(self, num: int) -> None: - self.num = num - - @staticmethod - @cached(None, copy_level=2) - def new(num: int): - return num - - a = MyClass.new(1) - assert isinstance(a, int) and a == 1 - - -def test_new_cached_method(random_cache_impl: type[BaseCacheImpl]): - class Test: - def __init__(self, num) -> None: - self.num = num - self._cache = random_cache_impl(20) - - @cached(lambda self: self._cache) - def method(self, char: str): - assert type(self) is Test - return char * self.num - - for i in range(10): - cls = Test(i) - assert cls.method("a") == ("a" * i) - - -def test_nested_cached_shared_cache(random_cache_impl: type[BaseCacheImpl]): - obj = random_cache_impl(10) - - @cached(obj, key_maker=make_typed_key) - def func_inner(a: int, b: int): - return a + b - - @cached(obj, key_maker=make_key) - def func_outer(a: int, b: int): - return f"{a} + {b} = {func_inner(a, b)}" - - assert func_outer(1, 2) == "1 + 2 = 3" - assert func_outer(1, 2) == "1 + 2 = 3" - assert func_outer(1, 2) == "1 + 2 = 3" - assert func_outer(1, 2) == "1 + 2 = 3" - assert func_outer(2, 3) == "2 + 3 = 5" - assert func_outer(a=2, b=3) == "2 + 3 = 5" - - -def test_recursive_cached(random_cache_impl: type[BaseCacheImpl]): - obj = random_cache_impl(10) - - @cached(obj) - def factorial(n): - if n < 0: - raise ValueError("فاکتوریل برای اعداد منفی تعریف نشده است.") - if n == 0 or n == 1: - return 1 - else: - return n * factorial(n - 1) - - assert factorial(10) == 3628800 - assert factorial(5) == 120 - assert factorial(10) == 3628800 - assert factorial(5) == 120 - assert factorial(10) == 3628800 - assert factorial(2) == 2 - - -def test_recursive_threading_cached(): - import threading - - obj = LRUCache(10) - - @cached(obj) - def factorial(n): - if n < 0: - raise ValueError("فاکتوریل برای اعداد منفی تعریف نشده است.") - if n == 0 or n == 1: - return 1 - else: - return n * factorial(n - 1) - - threads = list( - map( - lambda x: x.start() or x, - (threading.Thread(target=factorial, args=(10,), name=str(i)) for i in range(10)), - ) - ) - for t in threads: - t.join(timeout=60) - - -@pytest.mark.asyncio -async def test_recursive_asyncio_cached(): - obj = LRUCache(10) - - @cached(obj) - async def factorial(n) -> int: - if n < 0: - raise ValueError("فاکتوریل برای اعداد منفی تعریف نشده است.") - if n == 0 or n == 1: - return 1 - else: - return n * (await factorial(n - 1)) - - result = await asyncio.wait_for( - asyncio.gather( - factorial(10), - factorial(10), - factorial(10), - factorial(10), - factorial(10), - factorial(10), - factorial(10), - factorial(10), - ), - 10, - ) - assert result == ([3628800] * 8) diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..63df91e --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +imports_granularity = "Item" diff --git a/src/bridge/cache.rs b/src/bridge/cache.rs deleted file mode 100644 index 4b52a6b..0000000 --- a/src/bridge/cache.rs +++ /dev/null @@ -1,315 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct Cache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._core")] -pub struct cache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex< - hashbrown::raw::RawIter<(PreHashObject, pyo3::Py, usize)>, - >, -} - -#[pyo3::pymethods] -impl Cache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::nopolicy::NoPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> usize { - self.raw.lock().observed.get() as usize - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity() - } - - fn __len__(&self) -> usize { - self.raw.lock().len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - lock.capacity() - * (size_of::() + size_of::() + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, value, _) = entry.remove(); - Ok(value) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, ref value, _) = entry.into_value(); - Ok(value.clone_ref(py)) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py))?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = cache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - unsafe { - let state = { - let mp = pyo3::ffi::PyDict_New(); - - if mp.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for bucket in lock.iter() { - let (key, val, _) = bucket.as_ref(); - // SAFETY: we don't need to check error because we sure about key that is hashable. - pyo3::ffi::PyDict_SetItem(mp, key.obj.as_ptr(), val.as_ptr()); - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => mp, - 2 => capacity, - 3 => maxmemory, - )? - }; - Ok(pyo3::Py::from_owned_ptr(py, state)) - } - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for value in self.raw.lock().iter() { - let (key, value, _) = unsafe { value.as_ref() }; - visit.call(&key.obj)?; - visit.call(value)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl cache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - if let Some(x) = iter.next() { - let (key, val, _) = unsafe { x.as_ref() }; - - tuple!( - slf.py(), - 2, - 0 => key.obj.clone_ref(slf.py()).into_ptr(), - 1 => val.clone_ref(slf.py()).into_ptr(), - ) - } else { - Err(pyo3::PyErr::new::(())) - } - } -} diff --git a/src/bridge/fifocache.rs b/src/bridge/fifocache.rs deleted file mode 100644 index b09cc3c..0000000 --- a/src/bridge/fifocache.rs +++ /dev/null @@ -1,349 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct FIFOCache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._core")] -pub struct fifocache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex, -} - -#[pyo3::pymethods] -impl FIFOCache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::fifo::FIFOPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> u16 { - self.raw.lock().observed.get() - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity().0 - } - - fn __len__(&self) -> usize { - self.raw.lock().len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - let capacity = lock.capacity(); - - capacity.0 * size_of::() - + capacity.1 - * (size_of::() - + size_of::() - + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, value, _) = entry.remove(); - Ok(value) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem( - &self, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<(pyo3::Py, pyo3::Py)> { - let mut lock = self.raw.lock(); - - match lock.popitem(py)? { - Some((key, val, _)) => Ok((key.obj, val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, ref value, _) = entry.into_value(); - Ok(value.clone_ref(py)) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py))?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = fifocache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn get_index(&self, py: pyo3::Python<'_>, index: usize) -> Option> { - let lock = self.raw.lock(); - - lock.get_index(index) - .map(|(key, _, _)| key.obj.clone_ref(py)) - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - - let state = unsafe { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for (hk, val, _) in lock.entries_iter() { - let tp = tuple!( - py, - 2, - 0 => hk.obj.clone_ref(py).as_ptr(), - 1 => val.clone_ref(py).as_ptr(), - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity().0); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => list, - 2 => capacity, - 3 => maxmemory, - )? - }; - - Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for value in self.raw.lock().entries_iter() { - visit.call(&value.0.obj)?; - visit.call(&value.1)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl fifocache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - if let Some(x) = iter.next() { - let (key, val, _) = unsafe { x.as_ref() }; - - tuple!( - slf.py(), - 2, - 0 => key.obj.clone_ref(slf.py()).into_ptr(), - 1 => val.clone_ref(slf.py()).into_ptr(), - ) - } else { - Err(pyo3::PyErr::new::(())) - } - } -} diff --git a/src/bridge/lfucache.rs b/src/bridge/lfucache.rs deleted file mode 100644 index f480f19..0000000 --- a/src/bridge/lfucache.rs +++ /dev/null @@ -1,377 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct LFUCache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._core")] -pub struct lfucache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex, -} - -#[pyo3::pymethods] -impl LFUCache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::lfu::LFUPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> u16 { - self.raw.lock().observed.get() - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity() - } - - fn __len__(&self) -> usize { - self.raw.lock().len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - - lock.capacity() - * (size_of::() + size_of::() + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - #[pyo3(signature=(key, value, freq=0usize))] - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - freq: usize, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value, freq)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn peek( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.peek(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, value, _, _) = entry.remove(); - Ok(value) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem(&self) -> pyo3::PyResult<(pyo3::Py, pyo3::Py)> { - let mut lock = self.raw.lock(); - - match lock.popitem() { - Some((key, val, _, _)) => Ok((key.obj, val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - #[pyo3(signature=(key, default, freq=0usize))] - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - freq: usize, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let node = entry.into_value(); - Ok(unsafe { node.as_ref().1.clone_ref(py) }) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py), freq)?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = lfucache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - }; - - pyo3::Py::new(slf.py(), result) - } - - pub fn least_frequently_used( - &self, - py: pyo3::Python<'_>, - n: usize, - ) -> Option> { - let mut lock = self.raw.lock(); - lock.least_frequently_used(n) - .map(|x| unsafe { x.as_ref().0.obj.clone_ref(py) }) - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let mut lock = self.raw.lock(); - - let state = unsafe { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for ptr in lock.iter() { - let node = &(*ptr.as_ptr()); - - let frequency = pyo3::ffi::PyLong_FromSize_t(node.2); - if frequency.is_null() { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - - let tp = tuple!( - py, - 3, - 0 => node.0.obj.clone_ref(py).into_ptr(), - 1 => node.1.clone_ref(py).into_ptr(), - 2 => frequency, - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => list, - 2 => capacity, - 3 => maxmemory, - )? - }; - - Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for node in self.raw.lock().iter() { - let value = unsafe { node.as_ref() }; - - visit.call(&value.0.obj)?; - visit.call(&value.1)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl lfucache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - if let Some(x) = iter.next() { - let (key, val, freq, _) = unsafe { x.as_ref() }; - - let freq = unsafe { pyo3::ffi::PyLong_FromSize_t(*freq) }; - - tuple!( - slf.py(), - 3, - 0 => key.obj.clone_ref(slf.py()).into_ptr(), - 1 => val.clone_ref(slf.py()).into_ptr(), - 2 => freq, - ) - } else { - Err(pyo3::PyErr::new::(())) - } - } -} diff --git a/src/bridge/lrucache.rs b/src/bridge/lrucache.rs deleted file mode 100644 index 761edad..0000000 --- a/src/bridge/lrucache.rs +++ /dev/null @@ -1,363 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct LRUCache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._core")] -pub struct lrucache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex, -} - -#[pyo3::pymethods] -impl LRUCache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::lru::LRUPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> u16 { - self.raw.lock().observed.get() - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity() - } - - fn __len__(&self) -> usize { - self.raw.lock().len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - - lock.capacity() - * (size_of::() + size_of::() + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn peek( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.peek(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, value, _) = entry.remove(); - Ok(value) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem(&self) -> pyo3::PyResult<(pyo3::Py, pyo3::Py)> { - let mut lock = self.raw.lock(); - - match lock.popitem() { - Some((key, val, _)) => Ok((key.obj, val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, ref value, _) = entry.into_value(); - Ok(value.clone_ref(py)) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py))?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = lrucache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn least_recently_used(&self, py: pyo3::Python<'_>) -> Option> { - let lock = self.raw.lock(); - lock.least_recently_used().map(|x| x.0.obj.clone_ref(py)) - } - - fn most_recently_used(&self, py: pyo3::Python<'_>) -> Option> { - let lock = self.raw.lock(); - lock.most_recently_used().map(|x| x.0.obj.clone_ref(py)) - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - - let state = unsafe { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for node in lock.iter() { - let (hk, val, _) = &(*node.as_ptr()).element; - - let tp = tuple!( - py, - 2, - 0 => hk.obj.clone_ref(py).as_ptr(), - 1 => val.clone_ref(py).as_ptr(), - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => list, - 2 => capacity, - 3 => maxmemory, - )? - }; - - Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for node in self.raw.lock().iter() { - let value = unsafe { node.as_ref() }; - - visit.call(&value.element.0.obj)?; - visit.call(&value.element.1)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl lrucache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - if let Some(x) = iter.next() { - let (key, val, _) = unsafe { &x.as_ref().element }; - - tuple!( - slf.py(), - 2, - 0 => key.obj.clone_ref(slf.py()).into_ptr(), - 1 => val.clone_ref(slf.py()).into_ptr(), - ) - } else { - Err(pyo3::PyErr::new::(())) - } - } -} diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs deleted file mode 100644 index 40a7c3b..0000000 --- a/src/bridge/mod.rs +++ /dev/null @@ -1,111 +0,0 @@ -use pyo3::create_exception; -use pyo3::types::PyTypeMethods; - -create_exception!(cachebox._core, CoreKeyError, pyo3::exceptions::PyException); - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen, subclass))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type, subclass) -)] -pub struct BaseCacheImpl {} - -#[pyo3::pymethods] -impl BaseCacheImpl { - #[new] - #[pyo3(signature = (*args, **kwargs))] - #[classmethod] - #[allow(unused_variables)] - pub fn __new__( - cls: &pyo3::Bound<'_, pyo3::types::PyType>, - args: &pyo3::Bound<'_, pyo3::PyAny>, - kwargs: Option<&pyo3::Bound<'_, pyo3::PyAny>>, - ) -> pyo3::PyResult { - let size = unsafe { pyo3::ffi::PyTuple_Size(cls.mro().as_ptr()) }; - - // This means BaseCacheImpl is used as subclass - // So we shouldn't raise NotImplementedError - if size > 2 { - Ok(Self {}) - } else { - Err(pyo3::PyErr::new::("do not call this constructor, you can subclass this implementation or use other classes.")) - } - } - - #[allow(unused_variables)] - #[classmethod] - pub fn __class_getitem__( - cls: &pyo3::Bound<'_, pyo3::types::PyType>, - args: pyo3::Py, - ) -> pyo3::Py { - cls.clone().into() - } -} - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct TTLPair { - key: pyo3::Py, - value: pyo3::Py, - duration: std::time::Duration, -} - -impl TTLPair { - fn clone_from_pair(py: pyo3::Python<'_>, pair: &crate::common::TimeToLivePair) -> Self { - TTLPair { - key: pair.key.obj.clone_ref(py), - value: pair.value.clone_ref(py), - duration: pair.duration().unwrap_or_default(), - } - } -} - -impl From for TTLPair { - fn from(value: crate::common::TimeToLivePair) -> Self { - let duration = value.duration().unwrap_or_default(); - - TTLPair { - key: value.key.obj, - value: value.value, - duration, - } - } -} - -#[pyo3::pymethods] -impl TTLPair { - fn key(slf: pyo3::PyRef<'_, Self>) -> pyo3::Py { - slf.key.clone_ref(slf.py()) - } - - fn value(slf: pyo3::PyRef<'_, Self>) -> pyo3::Py { - slf.value.clone_ref(slf.py()) - } - - fn duration(slf: pyo3::PyRef<'_, Self>) -> f64 { - slf.duration.as_secs_f64() - } - - fn pack2(slf: pyo3::PyRef<'_, Self>) -> (pyo3::Py, pyo3::Py) { - (slf.key.clone_ref(slf.py()), slf.value.clone_ref(slf.py())) - } - - fn pack3(slf: pyo3::PyRef<'_, Self>) -> (pyo3::Py, pyo3::Py, f64) { - ( - slf.key.clone_ref(slf.py()), - slf.value.clone_ref(slf.py()), - slf.duration.as_secs_f64(), - ) - } -} - -pub mod cache; -pub mod fifocache; -pub mod lfucache; -pub mod lrucache; -pub mod rrcache; -pub mod ttlcache; -pub mod vttlcache; diff --git a/src/bridge/rrcache.rs b/src/bridge/rrcache.rs deleted file mode 100644 index a3777c8..0000000 --- a/src/bridge/rrcache.rs +++ /dev/null @@ -1,297 +0,0 @@ -use super::cache::cache_items; -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct RRCache { - raw: crate::common::Mutex, -} - -#[pyo3::pymethods] -impl RRCache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::random::RandomPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> usize { - self.raw.lock().observed.get() as usize - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity() - } - - fn __len__(&self) -> usize { - self.raw.lock().len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - lock.capacity() - * (size_of::() + size_of::() + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(val.clone_ref(py)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, value, _) = entry.remove(); - Ok(value) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem(&self) -> pyo3::PyResult<(pyo3::Py, pyo3::Py)> { - let mut lock = self.raw.lock(); - - match lock.popitem()? { - Some((key, val, _)) => Ok((key.obj, val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let (_, ref value, _) = entry.into_value(); - Ok(value.clone_ref(py)) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py))?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = cache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn random_key(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - match lock.random_key() { - Some(x) => Ok(x.obj.clone_ref(py)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - unsafe { - let state = { - let mp = pyo3::ffi::PyDict_New(); - - if mp.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for bucket in lock.iter() { - let (key, val, _) = bucket.as_ref(); - // SAFETY: we don't need to check error because we sure about key that is hashable. - pyo3::ffi::PyDict_SetItem(mp, key.obj.as_ptr(), val.as_ptr()); - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => mp, - 2 => capacity, - 3 => maxmemory, - )? - }; - Ok(pyo3::Py::from_owned_ptr(py, state)) - } - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for value in self.raw.lock().iter() { - let (key, value, _) = unsafe { value.as_ref() }; - visit.call(&key.obj)?; - visit.call(value)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} diff --git a/src/bridge/ttlcache.rs b/src/bridge/ttlcache.rs deleted file mode 100644 index 979205f..0000000 --- a/src/bridge/ttlcache.rs +++ /dev/null @@ -1,375 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; -use crate::common::TimeToLivePair; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct TTLCache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core"))] -#[cfg_attr(not(Py_3_9), pyo3::pyclass(module = "cachebox._core", immutable_type))] -pub struct ttlcache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex, - pub now: std::time::SystemTime, -} - -#[pyo3::pymethods] -impl TTLCache { - #[new] - #[pyo3(signature=(maxsize, ttl, *, capacity=0, maxmemory=0))] - fn __new__( - maxsize: usize, - ttl: f64, - capacity: usize, - maxmemory: usize, - ) -> pyo3::PyResult { - let raw = crate::policies::ttl::TTLPolicy::new(maxsize, capacity, ttl, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> u16 { - self.raw.lock().observed.get() - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn ttl(&self) -> f64 { - self.raw.lock().ttl().as_secs_f64() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity().0 - } - - fn __len__(&self) -> usize { - self.raw.lock().real_len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - let capacity = lock.capacity(); - - capacity.0 * size_of::() - + capacity.1 - * (size_of::() - + size_of::() - + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(super::TTLPair::clone_from_pair(py, val)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let t1 = slf.raw.lock(); - let t2 = other.raw.lock(); - t1.equal(slf.py(), &t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let val = entry.remove(); - Ok(super::TTLPair::from(val)) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - let mut lock = self.raw.lock(); - - match lock.popitem(py)? { - Some(val) => Ok(super::TTLPair::from(val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, py: pyo3::Python<'_>, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(py); - } - } - - fn shrink_to_fit(&self, py: pyo3::Python<'_>) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(py); - } - - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let val = entry.into_value(); - Ok(val.value.clone_ref(py)) - } - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py))?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(slf.py()); - - let result = ttlcache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - now: std::time::SystemTime::now(), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn get_index(&self, py: pyo3::Python<'_>, index: usize) -> Option> { - let lock = self.raw.lock(); - lock.get_index(index).map(|pair| pair.key.obj.clone_ref(py)) - } - - fn expire(&self, py: pyo3::Python<'_>) { - let mut lock = self.raw.lock(); - lock.expire(py); - lock.shrink_to_fit(py); - } - - fn __getnewargs__(&self) -> (usize, f64) { - (0, 0.0f64) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let lock = self.raw.lock(); - - let state = unsafe { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for element in lock.entries_iter() { - let tp = tuple!( - py, - 3, - 0 => element.key.obj.clone_ref(py).as_ptr(), - 1 => element.value.clone_ref(py).as_ptr(), - 2 => pyo3::ffi::PyFloat_FromDouble( - element.expire_at.unwrap_unchecked() - .duration_since(std::time::UNIX_EPOCH).unwrap_unchecked().as_secs_f64() - ), - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity().0); - let ttl = pyo3::ffi::PyFloat_FromDouble(lock.ttl().as_secs_f64()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 5, - 0 => maxsize, - 1 => list, - 2 => capacity, - 3 => ttl, - 4 => maxmemory, - )? - }; - - Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for value in self.raw.lock().entries_iter() { - visit.call(&value.key.obj)?; - visit.call(&value.value)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl ttlcache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - let mut element: std::ptr::NonNull; - loop { - element = { - if let Some(x) = iter.next() { - x - } else { - return Err(pyo3::PyErr::new::(())); - } - }; - - if unsafe { !element.as_ref().is_expired(slf.now) } { - break; - } - } - - Ok(super::TTLPair::clone_from_pair(slf.py(), unsafe { - element.as_ref() - })) - } -} diff --git a/src/bridge/vttlcache.rs b/src/bridge/vttlcache.rs deleted file mode 100644 index e815358..0000000 --- a/src/bridge/vttlcache.rs +++ /dev/null @@ -1,373 +0,0 @@ -use crate::common::Entry; -use crate::common::ObservedIterator; -use crate::common::PreHashObject; -use crate::common::TimeToLivePair; - -#[cfg_attr(Py_3_9, pyo3::pyclass(module = "cachebox._core", frozen))] -#[cfg_attr( - not(Py_3_9), - pyo3::pyclass(module = "cachebox._core", frozen, immutable_type) -)] -pub struct VTTLCache { - raw: crate::common::Mutex, -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._core")] -pub struct vttlcache_items { - pub ptr: ObservedIterator, - pub iter: crate::common::Mutex, - pub now: std::time::SystemTime, -} - -#[pyo3::pymethods] -impl VTTLCache { - #[new] - #[pyo3(signature=(maxsize, *, capacity=0, maxmemory=0))] - fn __new__(maxsize: usize, capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let raw = crate::policies::vttl::VTTLPolicy::new(maxsize, capacity, maxmemory)?; - - let self_ = Self { - raw: crate::common::Mutex::new(raw), - }; - Ok(self_) - } - - fn _state(&self) -> u16 { - self.raw.lock().observed.get() - } - - fn maxsize(&self) -> usize { - self.raw.lock().maxsize() - } - - fn maxmemory(&self) -> usize { - self.raw.lock().maxmemory() - } - - fn memory(&self) -> usize { - self.raw.lock().memory() - } - - fn capacity(&self) -> usize { - self.raw.lock().capacity() - } - - fn __len__(&self) -> usize { - self.raw.lock().real_len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - - lock.capacity() - * (size_of::() + size_of::() + size_of::()) - } - - fn __contains__( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(_) => Ok(true), - None => Ok(false), - } - } - - fn is_empty(&self) -> bool { - self.raw.lock().is_empty() - } - - fn is_full(&self) -> bool { - self.raw.lock().is_full() - } - - #[pyo3(signature=(key, value, ttl=None))] - fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - value: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult>> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry_with_slot(py, &key)? { - Entry::Occupied(entry) => Ok(Some(entry.update(py, value, ttl)?)), - Entry::Absent(entry) => { - entry.insert(py, key, value, ttl)?; - Ok(None) - } - } - } - - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.lookup(py, &key)? { - Some(val) => Ok(super::TTLPair::clone_from_pair(py, val)), - None => Err(pyo3::PyErr::new::(key.obj)), - } - } - - #[pyo3(signature=(iterable, ttl=None))] - fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.extend(py, iterable, ttl) - } - - fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::Py, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - let other = other.extract::>(slf.py())?; - - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let mut t1 = slf.raw.lock(); - let mut t2 = other.raw.lock(); - t1.equal(slf.py(), &mut t2) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let mut t1 = slf.raw.lock(); - let mut t2 = other.raw.lock(); - t1.equal(slf.py(), &mut t2).map(|r| !r) - } - _ => Err(pyo3::PyErr::new::( - "only '==' or '!=' are supported", - )), - } - } - - fn remove( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - ) -> pyo3::PyResult { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - let val = entry.remove(); - Ok(super::TTLPair::from(val)) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - - fn popitem(&self) -> pyo3::PyResult { - let mut lock = self.raw.lock(); - - match lock.popitem() { - Some(val) => Ok(super::TTLPair::from(val)), - None => Err(pyo3::PyErr::new::(())), - } - } - - fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - #[pyo3(signature=(key, default, ttl=None))] - fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::Py, - default: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => unsafe { - let val = entry.into_value(); - Ok(val.as_ref().value.clone_ref(py)) - }, - Entry::Absent(entry) => { - entry.insert(py, key, default.clone_ref(py), ttl)?; - Ok(default) - } - } - } - - fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - let state = lock.observed.get(); - let iter = lock.iter(); - - let result = vttlcache_items { - ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: crate::common::Mutex::new(iter), - now: std::time::SystemTime::now(), - }; - - pyo3::Py::new(slf.py(), result) - } - - fn expire(&self) { - let mut lock = self.raw.lock(); - lock.expire(); - lock.shrink_to_fit(); - } - - fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let mut lock = self.raw.lock(); - lock.expire(); - - let state = unsafe { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for ptr in lock.iter() { - let node = ptr.as_ref(); - - let ttlobject = pyo3::ffi::PyLong_FromDouble(node.expire_at.map_or(0.0, |x| { - x.duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_secs_f64() - })); - - if ttlobject.is_null() { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - - let tp = tuple!( - py, - 3, - 0 => node.key.obj.clone_ref(py).as_ptr(), - 1 => node.value.clone_ref(py).as_ptr(), - 2 => ttlobject, - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - let maxmemory = pyo3::ffi::PyLong_FromSize_t(lock.maxmemory()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => list, - 2 => capacity, - 3 => maxmemory, - )? - }; - - Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - } - - pub fn __setstate__( - &self, - py: pyo3::Python<'_>, - state: pyo3::Py, - ) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - lock.from_pickle(py, state.as_ptr()) - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for node in self.raw.lock().iter() { - let value = unsafe { node.as_ref() }; - - visit.call(&value.key.obj)?; - visit.call(&value.value)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.clear() - } -} - -#[pyo3::pymethods] -impl vttlcache_items { - fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult { - let mut iter = slf.iter.lock(); - - slf.ptr.proceed(slf.py())?; - - let mut element: std::ptr::NonNull; - loop { - element = { - if let Some(x) = iter.next() { - x - } else { - return Err(pyo3::PyErr::new::(())); - } - }; - - if unsafe { !element.as_ref().is_expired(slf.now) } { - break; - } - } - - Ok(super::TTLPair::clone_from_pair(slf.py(), unsafe { - element.as_ref() - })) - } -} diff --git a/src/common.rs b/src/common.rs deleted file mode 100644 index 1065fff..0000000 --- a/src/common.rs +++ /dev/null @@ -1,594 +0,0 @@ -use pyo3::types::PyAnyMethods; - -macro_rules! non_zero_or { - ($num:expr, $_else:expr) => { - unsafe { core::num::NonZeroUsize::new_unchecked(if $num == 0 { $_else } else { $num }) } - }; -} - -macro_rules! new_table { - ($capacity:expr) => {{ - if $capacity > 0 { - hashbrown::raw::RawTable::try_with_capacity($capacity) - .map_err(|_| pyo3::PyErr::new::(())) - } else { - Ok(hashbrown::raw::RawTable::new()) - } - }}; -} - -macro_rules! tuple { - ( - $py:expr, - $len:expr, - $($index:expr => $value:expr,)+ - ) => {{ - #[allow(unused_unsafe)] - let tuple = unsafe { pyo3::ffi::PyTuple_New($len) }; - if tuple.is_null() { - Err(pyo3::PyErr::fetch($py)) - } else { - #[allow(unused_unsafe)] - unsafe { - $( - pyo3::ffi::PyTuple_SetItem(tuple, $index, $value); - )+ - } - - Ok(tuple) - } - }}; - - (check $tuple:expr, size=$size:expr) => {{ - #[allow(unused_unsafe)] - if unsafe { pyo3::ffi::PyTuple_CheckExact($tuple) } == 0 { - Err( - pyo3::PyErr::new::("expected tuple, but got another type") - ) - } else if unsafe {pyo3::ffi::PyTuple_Size($tuple)} != $size { - Err( - pyo3::PyErr::new::("tuple size is invalid") - ) - } else { - Ok(()) - } - }} -} - -macro_rules! extract_pickle_tuple { - ($py:expr, $state:expr => list) => {{ - if pyo3::ffi::PyTuple_CheckExact($state) == 0 { - return Err(pyo3::PyErr::new::( - "expected tuple, but got another type", - )); - } - - let size = pyo3::ffi::PyTuple_Size($state); - if size != 3 && size != 4 { - return Err(pyo3::PyErr::new::( - "tuple size is invalid", - )); - } - - let maxsize = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 0); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - let iterable = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 1); - - if pyo3::ffi::PyList_CheckExact(obj) != 1 { - return Err(pyo3::PyErr::new::( - "the iterable object is not an dict or list", - )); - } - - // Tuple returns borrowed reference - pyo3::Py::::from_borrowed_ptr($py, obj) - }; - - let capacity = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 2); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - let maxmemory = if size == 4 { - let obj = pyo3::ffi::PyTuple_GetItem($state, 3); - let result = pyo3::ffi::PyLong_AsSize_t(obj); - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - result - } else { - 0 - }; - - (maxsize, iterable, capacity, maxmemory) - }}; - - ($py:expr, $state:expr => dict) => {{ - if pyo3::ffi::PyTuple_CheckExact($state) == 0 { - return Err(pyo3::PyErr::new::( - "expected tuple, but got another type", - )); - } - - let size = pyo3::ffi::PyTuple_Size($state); - if size != 3 && size != 4 { - return Err(pyo3::PyErr::new::( - "tuple size is invalid", - )); - } - - let maxsize = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 0); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - let iterable = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 1); - - if pyo3::ffi::PyDict_CheckExact(obj) != 1 { - return Err(pyo3::PyErr::new::( - "the iterable object is not an dict or list", - )); - } - - // Tuple returns borrowed reference - pyo3::Py::::from_borrowed_ptr($py, obj) - }; - - let capacity = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 2); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - let maxmemory = if size == 4 { - let obj = pyo3::ffi::PyTuple_GetItem($state, 3); - let result = pyo3::ffi::PyLong_AsSize_t(obj); - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - result - } else { - 0 - }; - - (maxsize, iterable, capacity, maxmemory) - }}; -} - -#[inline] -#[cfg(not(PyPy))] -pub fn pyobject_size(py: pyo3::Python<'_>, obj: &pyo3::Py) -> pyo3::PyResult { - static SIZEOF_METHOD_NAME: &'static std::ffi::CStr = c"__sizeof__"; - - // PyPy does not support __sizeof__ or sys.getsizeof - let sizeof_method = obj.bind(py).getattr(SIZEOF_METHOD_NAME)?; - - unsafe { - if pyo3::ffi::PyType_Check(obj.as_ptr()) == 1 { - sizeof_method.call1((obj,))?.extract::() - } else { - sizeof_method.call0()?.extract::() - } - } -} - -#[inline] -#[cfg(PyPy)] -pub fn pyobject_size(py: pyo3::Python<'_>, obj: &pyo3::Py) -> pyo3::PyResult { - static SIZEOF_METHOD_NAME: &'static std::ffi::CStr = c"__sizeof__"; - - // PyPy does not support __sizeof__ or sys.getsizeof - let sizeof_method = obj.bind(py).getattr_opt(SIZEOF_METHOD_NAME)?; - - match sizeof_method { - Some(sizeof_method) => unsafe { - if pyo3::ffi::PyType_Check(obj.as_ptr()) == 1 { - sizeof_method.call1((obj,))?.extract::() - } else { - sizeof_method.call0()?.extract::() - } - }, - None => Ok(1), - } -} - -#[inline] -pub fn entry_size( - py: pyo3::Python<'_>, - key: &PreHashObject, - value: &pyo3::Py, -) -> pyo3::PyResult { - let key_size = pyobject_size(py, &key.obj)?; - let value_size = pyobject_size(py, value)?; - - Ok(key_size.saturating_add(value_size)) -} - -#[inline] -pub fn pyobject_equal( - py: pyo3::Python<'_>, - arg1: *mut pyo3::ffi::PyObject, - arg2: *mut pyo3::ffi::PyObject, -) -> pyo3::PyResult { - unsafe { - if std::ptr::eq(arg1, arg2) { - return Ok(true); - } - - let boolean = pyo3::ffi::PyObject_RichCompareBool(arg1, arg2, pyo3::ffi::Py_EQ); - - if boolean < 0 { - Err(pyo3::PyErr::take(py).unwrap_unchecked()) - } else { - Ok(boolean == 1) - } - } -} - -/// Converts an isize value to a u64 value, mapping negative values to the upper half of the u64 range. -/// -/// This function ensures a bijective mapping between isize and u64, preserving the order of values -/// by offsetting negative values to the upper range of u64. -#[inline(always)] -fn convert_isize_to_u64(v: &isize) -> u64 { - const OFFSET: u64 = 0x8000000000000000; // 1 << 63 - - if *v >= 0 { - *v as u64 - } else { - (-(*v + 1)) as u64 + OFFSET - } -} - -/// Precomputed Hash PyObject -/// -/// A precomputed hash is a cryptographic hash value that's calculated in advance -/// and stored for later use, rather than being computed on demand when needed. -pub struct PreHashObject { - pub obj: pyo3::Py, - pub hash: u64, -} - -/// A view into a single entry in a table, which may either be absent or occupied. -/// -/// This is common in policies and will be used by `entry(...)` methods of them. -pub enum Entry { - Occupied(O), - Absent(V), -} - -/// Observe caches' changes -#[derive(Debug)] -pub struct Observed(u16); - -/// Checks the [`Observed`] on iterators -#[derive(Debug)] -pub struct ObservedIterator { - pub ptr: core::ptr::NonNull, - pub statepoint: u16, -} - -pub struct NoLifetimeSliceIter { - pub pointer: std::ptr::NonNull, - pub index: usize, - pub len: usize, -} - -/// A pair representing a key-value entry with a time-to-live (TTL) expiration. -pub struct TimeToLivePair { - pub key: PreHashObject, - pub value: pyo3::Py, - pub expire_at: Option, - pub size: usize, -} - -/// Represents the possible situations when a key is absent in VTTL or TTL policy's data structure. -/// -/// This enum helps track different scenarios during key insertion. -pub enum AbsentSituation { - /// A valid insertion slot is available - Slot(hashbrown::raw::InsertSlot), - - /// An expired entry's bucket is found - Expired(hashbrown::raw::Bucket), - - /// No suitable slot or expired entry is found - None, -} - -impl PreHashObject { - /// Creates a new [`PreHashObject`] - #[inline] - pub fn new(obj: pyo3::Py, hash: u64) -> Self { - Self { obj, hash } - } - - /// Calculates the hash of `object` and creates a new [`PreHashObject`] - #[inline] - pub fn from_pyobject( - py: pyo3::Python<'_>, - object: pyo3::Py, - ) -> pyo3::PyResult { - unsafe { - let py_hash = pyo3::ffi::PyObject_Hash(object.as_ptr()); - - if py_hash == -1 { - // SAFETY: - // PyObject_Hash never returns -1 on success. - return Err(pyo3::PyErr::take(py).unwrap_unchecked()); - } - - Ok(Self::new(object, convert_isize_to_u64(&py_hash))) - } - } - - /// Check equality of two objects by using [`pyo3::ffi::PyObject_RichCompareBool`] - #[inline] - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - pyobject_equal(py, self.obj.as_ptr(), other.obj.as_ptr()) - } -} - -impl std::fmt::Debug for PreHashObject { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "PreHashObject({})", self.hash) - } -} - -/// A trait for adding `try_find` and `try_find_entry` methods to [`hashbrown::HashTable`] -pub trait TryFindMethods { - /// Searches for an element in the table. - fn try_find( - &self, - hash: u64, - compare: impl FnMut(&T) -> Result, - ) -> Result>, E>; - - fn try_find_or_find_insert_slot( - &mut self, - hash: u64, - compare: impl FnMut(&T) -> Result, - hasher: impl Fn(&T) -> u64, - ) -> Result, hashbrown::raw::InsertSlot>, E>; -} - -impl TryFindMethods for hashbrown::raw::RawTable { - #[inline] - fn try_find( - &self, - hash: u64, - mut compare: impl FnMut(&T) -> Result, - ) -> Result>, E> { - let mut error = None; - - let found = self.find(hash, |item| { - match compare(item) { - Ok(boolean) => boolean, - Err(e) => { - error = Some(e); - true // To break checking - } - } - }); - - if let Some(error) = error { - Err(error) - } else { - Ok(found) - } - } - - #[inline] - fn try_find_or_find_insert_slot( - &mut self, - hash: u64, - mut compare: impl FnMut(&T) -> Result, - hasher: impl Fn(&T) -> u64, - ) -> Result, hashbrown::raw::InsertSlot>, E> { - let mut error = None; - - let found = self.find_or_find_insert_slot( - hash, - |item| { - match compare(item) { - Ok(boolean) => boolean, - Err(e) => { - error = Some(e); - true // To break checking - } - } - }, - hasher, - ); - - if let Some(error) = error { - Err(error) - } else { - Ok(found) - } - } -} - -impl Observed { - #[cold] - pub fn new() -> Self { - Self(0) - } - - #[inline(always)] - pub fn change(&mut self) { - if self.0 == u16::MAX { - self.0 = 0; - } else { - self.0 = unsafe { self.0.unchecked_add(1) }; - } - } - - pub fn get(&self) -> u16 { - self.0 - } -} - -#[inline] -unsafe fn _get_state(py: pyo3::Python<'_>, ptr: *mut pyo3::ffi::PyObject) -> pyo3::PyResult { - unsafe fn inner( - py: pyo3::Python<'_>, - ptr: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - cfg_if::cfg_if! { - if #[cfg(all(Py_3_9, not(any(Py_LIMITED_API, PyPy, GraalPy))))] { - use pyo3::IntoPyObject; - - let m_name: pyo3::Bound<'_, pyo3::types::PyString> = "_state".into_pyobject(py)?; - Ok(pyo3::ffi::PyObject_CallMethodNoArgs(ptr, m_name.as_ptr())) - } else { - let state_fn = - pyo3::ffi::PyObject_GetAttrString(ptr, pyo3::ffi::c_str!("_state").as_ptr()); - - if state_fn.is_null() { - return Err(pyo3::PyErr::take(py).unwrap_unchecked()); - } - - let empty_args = pyo3::ffi::PyTuple_New(0); - let result = pyo3::ffi::PyObject_Call(state_fn, empty_args, std::ptr::null_mut()); - pyo3::ffi::Py_XDECREF(empty_args); - pyo3::ffi::Py_XDECREF(state_fn); - - Ok(result) - } - } - } - - let result = inner(py, ptr)?; - - if result.is_null() { - return Err(pyo3::PyErr::take(py).unwrap_unchecked()); - } - - let c = pyo3::ffi::PyLong_AsSize_t(result); - pyo3::ffi::Py_XDECREF(result); - - Ok(c as u16) -} - -impl ObservedIterator { - pub fn new(ptr: *mut pyo3::ffi::PyObject, state: u16) -> Self { - unsafe { - pyo3::ffi::Py_XINCREF(ptr); - } - - Self { - ptr: unsafe { core::ptr::NonNull::new(ptr).unwrap_unchecked() }, - statepoint: state, - } - } - - #[inline] - pub fn proceed(&self, py: pyo3::Python<'_>) -> pyo3::PyResult<()> { - let state = unsafe { _get_state(py, self.ptr.as_ptr())? }; - - if state != self.statepoint { - return Err(pyo3::PyErr::new::( - "cache changed during iteration", - )); - } - - Ok(()) - } -} - -impl Drop for ObservedIterator { - fn drop(&mut self) { - unsafe { - pyo3::ffi::Py_XDECREF(self.ptr.as_ptr()); - } - } -} - -unsafe impl Send for ObservedIterator {} -unsafe impl Sync for ObservedIterator {} - -impl NoLifetimeSliceIter { - pub fn new(slice: &[T]) -> Self { - let pointer: std::ptr::NonNull = std::ptr::NonNull::from(slice).cast(); - - Self { - pointer, - index: 0, - len: slice.len(), - } - } -} - -impl Iterator for NoLifetimeSliceIter { - type Item = std::ptr::NonNull; - - #[inline] - fn next(&mut self) -> Option { - if self.index >= self.len { - None - } else { - let value = unsafe { self.pointer.add(self.index) }; - self.index += 1; - Some(value) - } - } -} - -impl TimeToLivePair { - #[inline] - pub fn new( - key: PreHashObject, - value: pyo3::Py, - expire_at: Option, - size: usize, - ) -> Self { - Self { - key, - value, - expire_at, - size, - } - } - - pub fn duration(&self) -> Option { - self.expire_at.map(|x| { - x.duration_since(std::time::SystemTime::now()) - .unwrap_or_default() - }) - } - - #[inline] - pub fn is_expired(&self, now: std::time::SystemTime) -> bool { - match self.expire_at { - Some(x) => x < now, - None => false, - } - } -} - -pub type Mutex = parking_lot::Mutex; diff --git a/src/hashbrown/alloc.rs b/src/hashbrown/alloc.rs new file mode 100644 index 0000000..89feb2b --- /dev/null +++ b/src/hashbrown/alloc.rs @@ -0,0 +1,13 @@ +use core::ptr::NonNull; + +#[cfg(test)] +pub(crate) use std::alloc::AllocError; +use std::alloc::Layout; +pub(crate) use std::alloc::{Allocator, Global}; + +pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + match alloc.allocate(layout) { + Ok(ptr) => Ok(ptr), + Err(_) => Err(()), + } +} diff --git a/src/hashbrown/control/bitmask.rs b/src/hashbrown/control/bitmask.rs new file mode 100644 index 0000000..7228312 --- /dev/null +++ b/src/hashbrown/control/bitmask.rs @@ -0,0 +1,107 @@ +use super::group::{BITMASK_ITER_MASK, BITMASK_STRIDE, BitMaskWord, NonZeroBitMaskWord}; + +/// A bit mask which contains the result of a `Match` operation on a `Group` and +/// allows iterating through them. +/// +/// The bit mask is arranged so that low-order bits represent lower memory +/// addresses for group match results. +/// +/// For implementation reasons, the bits in the set may be sparsely packed with +/// groups of 8 bits representing one element. If any of these bits are non-zero +/// then this element is considered to true in the mask. If this is the +/// case, `BITMASK_STRIDE` will be 8 to indicate a divide-by-8 should be +/// performed on counts/indices to normalize this difference. `BITMASK_MASK` is +/// similarly a mask of all the actually-used bits. +/// +/// To iterate over a bit mask, it must be converted to a form where only 1 bit +/// is set per element. This is done by applying `BITMASK_ITER_MASK` on the +/// mask bits. +#[derive(Copy, Clone)] +pub(crate) struct BitMask(pub(crate) BitMaskWord); + +#[expect(clippy::use_self)] +impl BitMask { + /// Returns a new `BitMask` with the lowest bit removed. + #[inline] + #[must_use] + fn remove_lowest_bit(self) -> Self { + BitMask(self.0 & (self.0 - 1)) + } + + /// Returns whether the `BitMask` has at least one set bit. + #[inline] + pub(crate) fn any_bit_set(self) -> bool { + self.0 != 0 + } + + /// Returns the first set bit in the `BitMask`, if there is one. + #[inline] + pub(crate) fn lowest_set_bit(self) -> Option { + if let Some(nonzero) = NonZeroBitMaskWord::new(self.0) { + Some(Self::nonzero_trailing_zeros(nonzero)) + } else { + None + } + } + + /// Returns the number of trailing zeroes in the `BitMask`. + #[inline] + pub(crate) fn trailing_zeros(self) -> usize { + // ARM doesn't have a trailing_zeroes instruction, and instead uses + // reverse_bits (RBIT) + leading_zeroes (CLZ). However older ARM + // versions (pre-ARMv7) don't have RBIT and need to emulate it + // instead. Since we only have 1 bit set in each byte on ARM, we can + // use swap_bytes (REV) + leading_zeroes instead. + if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 { + self.0.swap_bytes().leading_zeros() as usize / BITMASK_STRIDE + } else { + self.0.trailing_zeros() as usize / BITMASK_STRIDE + } + } + + /// Same as above but takes a `NonZeroBitMaskWord`. + #[inline] + fn nonzero_trailing_zeros(nonzero: NonZeroBitMaskWord) -> usize { + if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 { + // SAFETY: A byte-swapped non-zero value is still non-zero. + let swapped = unsafe { NonZeroBitMaskWord::new_unchecked(nonzero.get().swap_bytes()) }; + swapped.leading_zeros() as usize / BITMASK_STRIDE + } else { + nonzero.trailing_zeros() as usize / BITMASK_STRIDE + } + } + + /// Returns the number of leading zeroes in the `BitMask`. + #[inline] + pub(crate) fn leading_zeros(self) -> usize { + self.0.leading_zeros() as usize / BITMASK_STRIDE + } +} + +impl IntoIterator for BitMask { + type Item = usize; + type IntoIter = BitMaskIter; + + #[inline] + fn into_iter(self) -> BitMaskIter { + // A BitMask only requires each element (group of bits) to be non-zero. + // However for iteration we need each element to only contain 1 bit. + BitMaskIter(BitMask(self.0 & BITMASK_ITER_MASK)) + } +} + +/// Iterator over the contents of a `BitMask`, returning the indices of set +/// bits. +#[derive(Clone)] +pub(crate) struct BitMaskIter(pub(crate) BitMask); + +impl Iterator for BitMaskIter { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + let bit = self.0.lowest_set_bit()?; + self.0 = self.0.remove_lowest_bit(); + Some(bit) + } +} diff --git a/src/hashbrown/control/group/generic.rs b/src/hashbrown/control/group/generic.rs new file mode 100644 index 0000000..09d5cd8 --- /dev/null +++ b/src/hashbrown/control/group/generic.rs @@ -0,0 +1,152 @@ +use super::super::{BitMask, Tag}; +use core::{mem, ptr}; + +// Use the native word size as the group size. Using a 64-bit group size on +// a 32-bit architecture will just end up being more expensive because +// shifts and multiplies will need to be emulated. + +cfg_if! { + if #[cfg(any( + target_pointer_width = "64", + target_arch = "aarch64", + target_arch = "x86_64", + target_arch = "wasm32", + ))] { + type GroupWord = u64; + type NonZeroGroupWord = core::num::NonZeroU64; + } else { + type GroupWord = u32; + type NonZeroGroupWord = core::num::NonZeroU32; + } +} + +pub(crate) type BitMaskWord = GroupWord; +pub(crate) type NonZeroBitMaskWord = NonZeroGroupWord; +pub(crate) const BITMASK_STRIDE: usize = 8; +// We only care about the highest bit of each tag for the mask. +const BITMASK_MASK: BitMaskWord = u64::from_ne_bytes([Tag::DELETED.0; 8]) as GroupWord; +pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; + +/// Helper function to replicate a tag across a `GroupWord`. +#[inline] +fn repeat(tag: Tag) -> GroupWord { + GroupWord::from_ne_bytes([tag.0; Group::WIDTH]) +} + +/// Abstraction over a group of control tags which can be scanned in +/// parallel. +/// +/// This implementation uses a word-sized integer. +#[derive(Copy, Clone)] +pub(crate) struct Group(GroupWord); + +// We perform all operations in the native endianness, and convert to +// little-endian just before creating a BitMask. The can potentially +// enable the compiler to eliminate unnecessary byte swaps if we are +// only checking whether a BitMask is empty. +#[expect(clippy::use_self)] +impl Group { + /// Number of bytes in the group. + pub(crate) const WIDTH: usize = mem::size_of::(); + + /// Returns a full group of empty tags, suitable for use as the initial + /// value for an empty hash table. + /// + /// This is guaranteed to be aligned to the group size. + #[inline] + pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { + #[repr(C)] + struct AlignedTags { + _align: [Group; 0], + tags: [Tag; Group::WIDTH], + } + const ALIGNED_TAGS: AlignedTags = AlignedTags { + _align: [], + tags: [Tag::EMPTY; Group::WIDTH], + }; + &ALIGNED_TAGS.tags + } + + /// Loads a group of tags starting at the given address. + #[inline] + pub(crate) unsafe fn load(ptr: *const Tag) -> Self { + unsafe { Group(ptr::read_unaligned(ptr.cast())) } + } + + /// Loads a group of tags starting at the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { Group(ptr::read(ptr.cast())) } + } + + /// Stores the group of tags to the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { + ptr::write(ptr.cast(), self.0); + } + } + + /// Returns a `BitMask` indicating all tags in the group which *may* + /// have the given value. + /// + /// This function may return a false positive in certain cases where + /// the tag in the group differs from the searched value only in its + /// lowest bit. This is fine because: + /// - This never happens for `EMPTY` and `DELETED`, only full entries. + /// - The check for key equality will catch these. + /// - This only happens if there is at least 1 true match. + /// - The chance of this happening is very low (< 1% chance per tag). + #[inline] + pub(crate) fn match_tag(self, tag: Tag) -> BitMask { + // This algorithm is derived from + // https://graphics.stanford.edu/~seander/bithacks.html##ValueInWord + let cmp = self.0 ^ repeat(tag); + BitMask((cmp.wrapping_sub(repeat(Tag(0x01))) & !cmp & repeat(Tag::DELETED)).to_le()) + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY`. + #[inline] + pub(crate) fn match_empty(self) -> BitMask { + // If the high bit is set, then the tag must be either: + // 1111_1111 (EMPTY) or 1000_0000 (DELETED). + // So we can just check if the top two bits are 1 by ANDing them. + BitMask((self.0 & (self.0 << 1) & repeat(Tag::DELETED)).to_le()) + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY` or `DELETED`. + #[inline] + pub(crate) fn match_empty_or_deleted(self) -> BitMask { + // A tag is EMPTY or DELETED iff the high bit is set + BitMask((self.0 & repeat(Tag::DELETED)).to_le()) + } + + /// Returns a `BitMask` indicating all tags in the group which are full. + #[inline] + pub(crate) fn match_full(self) -> BitMask { + BitMask(self.match_empty_or_deleted().0 ^ BITMASK_MASK) + } + + /// Performs the following transformation on all tags in the group: + /// - `EMPTY => EMPTY` + /// - `DELETED => EMPTY` + /// - `FULL => DELETED` + #[inline] + pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { + // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 + // and high_bit = 0 (FULL) to 1000_0000 + // + // Here's this logic expanded to concrete values: + // let full = 1000_0000 (true) or 0000_0000 (false) + // !1000_0000 + 1 = 0111_1111 + 1 = 1000_0000 (no carry) + // !0000_0000 + 0 = 1111_1111 + 0 = 1111_1111 (no carry) + let full = !self.0 & repeat(Tag::DELETED); + Group(!full + (full >> 7)) + } +} diff --git a/src/hashbrown/control/group/lsx.rs b/src/hashbrown/control/group/lsx.rs new file mode 100644 index 0000000..7da098a --- /dev/null +++ b/src/hashbrown/control/group/lsx.rs @@ -0,0 +1,124 @@ +use super::super::{BitMask, Tag}; +use core::mem; +use core::num::NonZeroU16; + +use core::arch::loongarch64::*; + +pub(crate) type BitMaskWord = u16; +pub(crate) type NonZeroBitMaskWord = NonZeroU16; +pub(crate) const BITMASK_STRIDE: usize = 1; +pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; + +/// Abstraction over a group of control tags which can be scanned in +/// parallel. +/// +/// This implementation uses a 128-bit LSX value. +#[derive(Copy, Clone)] +pub(crate) struct Group(m128i); + +// FIXME: https://github.com/rust-lang/rust-clippy/issues/3859 +#[expect(clippy::use_self)] +impl Group { + /// Number of bytes in the group. + pub(crate) const WIDTH: usize = mem::size_of::(); + + /// Returns a full group of empty tags, suitable for use as the initial + /// value for an empty hash table. + /// + /// This is guaranteed to be aligned to the group size. + #[inline] + pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { + #[repr(C)] + struct AlignedTags { + _align: [Group; 0], + tags: [Tag; Group::WIDTH], + } + const ALIGNED_TAGS: AlignedTags = AlignedTags { + _align: [], + tags: [Tag::EMPTY; Group::WIDTH], + }; + &ALIGNED_TAGS.tags + } + + /// Loads a group of tags starting at the given address. + #[inline] + pub(crate) unsafe fn load(ptr: *const Tag) -> Self { + unsafe { Group(lsx_vld::<0>(ptr.cast())) } + } + + /// Loads a group of tags starting at the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { Group(lsx_vld::<0>(ptr.cast())) } + } + + /// Stores the group of tags to the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { + lsx_vst::<0>(self.0, ptr.cast()); + } + } + + /// Returns a `BitMask` indicating all tags in the group which have + /// the given value. + #[inline] + pub(crate) fn match_tag(self, tag: Tag) -> BitMask { + unsafe { + let cmp = lsx_vseq_b(self.0, lsx_vreplgr2vr_b(tag.0 as i32)); + BitMask(lsx_vpickve2gr_hu::<0>(lsx_vmskltz_b(cmp)) as u16) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY`. + #[inline] + pub(crate) fn match_empty(self) -> BitMask { + unsafe { + let cmp = lsx_vseqi_b::<{ Tag::EMPTY.0 as i8 as i32 }>(self.0); + BitMask(lsx_vpickve2gr_hu::<0>(lsx_vmskltz_b(cmp)) as u16) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY` or `DELETED`. + #[inline] + pub(crate) fn match_empty_or_deleted(self) -> BitMask { + unsafe { + // A tag is EMPTY or DELETED iff the high bit is set + BitMask(lsx_vpickve2gr_hu::<0>(lsx_vmskltz_b(self.0)) as u16) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are full. + #[inline] + pub(crate) fn match_full(&self) -> BitMask { + unsafe { + // A tag is EMPTY or DELETED iff the high bit is set + BitMask(lsx_vpickve2gr_hu::<0>(lsx_vmskgez_b(self.0)) as u16) + } + } + + /// Performs the following transformation on all tags in the group: + /// - `EMPTY => EMPTY` + /// - `DELETED => EMPTY` + /// - `FULL => DELETED` + #[inline] + pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { + // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 + // and high_bit = 0 (FULL) to 1000_0000 + // + // Here's this logic expanded to concrete values: + // let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false) + // 1111_1111 | 1000_0000 = 1111_1111 + // 0000_0000 | 1000_0000 = 1000_0000 + unsafe { + let special = lsx_vslti_b::<0>(self.0); + Group(lsx_vori_b::<{ Tag::DELETED.0 as u32 }>(special)) + } + } +} diff --git a/src/hashbrown/control/group/mod.rs b/src/hashbrown/control/group/mod.rs new file mode 100644 index 0000000..8975f94 --- /dev/null +++ b/src/hashbrown/control/group/mod.rs @@ -0,0 +1,47 @@ +// TESTING NOTE: +// +// Because this module uses `cfg(..)` to select an implementation, it will not +// be linted without being run on targets that actually load each of these +// modules. Be sure to edit `ci/tools.sh` to add in the necessary cfgs if you +// change these, so that your implementation gets properly linted. + +cfg_if::cfg_if! { + // Use the SSE2 implementation if possible: it allows us to scan 16 buckets + // at once instead of 8. We don't bother with AVX since it would require + // runtime dispatch and wouldn't gain us much anyways: the probability of + // finding a match drops off drastically after the first few buckets. + // + // I attempted an implementation on ARM using NEON instructions, but it + // turns out that most NEON instructions have multi-cycle latency, which in + // the end outweighs any gains over the generic implementation. + if #[cfg(all( + target_feature = "sse2", + any(target_arch = "x86", target_arch = "x86_64"), + not(miri), + ))] { + mod sse2; + use sse2 as imp; + } else if #[cfg(all( + target_arch = "aarch64", + target_feature = "neon", + // NEON intrinsics are currently broken on big-endian targets. + // See https://github.com/rust-lang/stdarch/issues/1484. + target_endian = "little", + not(miri), + ))] { + mod neon; + use neon as imp; + } else if #[cfg(all( + target_arch = "loongarch64", + target_feature = "lsx", + not(miri), + ))] { + mod lsx; + use lsx as imp; + } else { + mod generic; + use generic as imp; + } +} +pub(crate) use self::imp::Group; +pub(super) use self::imp::{BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_STRIDE}; diff --git a/src/hashbrown/control/group/neon.rs b/src/hashbrown/control/group/neon.rs new file mode 100644 index 0000000..c64b891 --- /dev/null +++ b/src/hashbrown/control/group/neon.rs @@ -0,0 +1,119 @@ +use super::super::{BitMask, Tag}; +use core::arch::aarch64 as neon; +use core::mem; +use core::num::NonZeroU64; + +pub(crate) type BitMaskWord = u64; +pub(crate) type NonZeroBitMaskWord = NonZeroU64; +pub(crate) const BITMASK_STRIDE: usize = 8; +pub(crate) const BITMASK_ITER_MASK: BitMaskWord = 0x8080_8080_8080_8080; + +/// Abstraction over a group of control tags which can be scanned in +/// parallel. +/// +/// This implementation uses a 64-bit NEON value. +#[derive(Copy, Clone)] +pub(crate) struct Group(neon::uint8x8_t); + +#[expect(clippy::use_self)] +impl Group { + /// Number of bytes in the group. + pub(crate) const WIDTH: usize = mem::size_of::(); + + /// Returns a full group of empty tags, suitable for use as the initial + /// value for an empty hash table. + /// + /// This is guaranteed to be aligned to the group size. + #[inline] + pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { + #[repr(C)] + struct AlignedTags { + _align: [Group; 0], + tags: [Tag; Group::WIDTH], + } + const ALIGNED_TAGS: AlignedTags = AlignedTags { + _align: [], + tags: [Tag::EMPTY; Group::WIDTH], + }; + &ALIGNED_TAGS.tags + } + + /// Loads a group of tags starting at the given address. + #[inline] + pub(crate) unsafe fn load(ptr: *const Tag) -> Self { + unsafe { Group(neon::vld1_u8(ptr.cast())) } + } + + /// Loads a group of tags starting at the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { Group(neon::vld1_u8(ptr.cast())) } + } + + /// Stores the group of tags to the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { + neon::vst1_u8(ptr.cast(), self.0); + } + } + + /// Returns a `BitMask` indicating all tags in the group which *may* + /// have the given value. + #[inline] + pub(crate) fn match_tag(self, tag: Tag) -> BitMask { + unsafe { + let cmp = neon::vceq_u8(self.0, neon::vdup_n_u8(tag.0)); + BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0)) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY`. + #[inline] + pub(crate) fn match_empty(self) -> BitMask { + self.match_tag(Tag::EMPTY) + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY` or `DELETED`. + #[inline] + pub(crate) fn match_empty_or_deleted(self) -> BitMask { + unsafe { + let cmp = neon::vcltz_s8(neon::vreinterpret_s8_u8(self.0)); + BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0)) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are full. + #[inline] + pub(crate) fn match_full(self) -> BitMask { + unsafe { + let cmp = neon::vcgez_s8(neon::vreinterpret_s8_u8(self.0)); + BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0)) + } + } + + /// Performs the following transformation on all tags in the group: + /// - `EMPTY => EMPTY` + /// - `DELETED => EMPTY` + /// - `FULL => DELETED` + #[inline] + pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { + // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 + // and high_bit = 0 (FULL) to 1000_0000 + // + // Here's this logic expanded to concrete values: + // let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false) + // 1111_1111 | 1000_0000 = 1111_1111 + // 0000_0000 | 1000_0000 = 1000_0000 + unsafe { + let special = neon::vcltz_s8(neon::vreinterpret_s8_u8(self.0)); + Group(neon::vorr_u8(special, neon::vdup_n_u8(0x80))) + } + } +} diff --git a/src/hashbrown/control/group/sse2.rs b/src/hashbrown/control/group/sse2.rs new file mode 100644 index 0000000..2b12c01 --- /dev/null +++ b/src/hashbrown/control/group/sse2.rs @@ -0,0 +1,143 @@ +use super::super::{BitMask, Tag}; +use core::mem; +use core::num::NonZeroU16; + +#[cfg(target_arch = "x86")] +use core::arch::x86; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64 as x86; + +pub(crate) type BitMaskWord = u16; +pub(crate) type NonZeroBitMaskWord = NonZeroU16; +pub(crate) const BITMASK_STRIDE: usize = 1; +pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; + +/// Abstraction over a group of control tags which can be scanned in +/// parallel. +/// +/// This implementation uses a 128-bit SSE value. +#[derive(Copy, Clone)] +pub(crate) struct Group(x86::__m128i); + +// FIXME: https://github.com/rust-lang/rust-clippy/issues/3859 +#[expect(clippy::use_self)] +impl Group { + /// Number of bytes in the group. + pub(crate) const WIDTH: usize = mem::size_of::(); + + /// Returns a full group of empty tags, suitable for use as the initial + /// value for an empty hash table. + /// + /// This is guaranteed to be aligned to the group size. + #[inline] + pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { + #[repr(C)] + struct AlignedTags { + _align: [Group; 0], + tags: [Tag; Group::WIDTH], + } + const ALIGNED_TAGS: AlignedTags = AlignedTags { + _align: [], + tags: [Tag::EMPTY; Group::WIDTH], + }; + &ALIGNED_TAGS.tags + } + + /// Loads a group of tags starting at the given address. + #[inline] + pub(crate) unsafe fn load(ptr: *const Tag) -> Self { + unsafe { Group(x86::_mm_loadu_si128(ptr.cast())) } + } + + /// Loads a group of tags starting at the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { Group(x86::_mm_load_si128(ptr.cast())) } + } + + /// Stores the group of tags to the given address, which must be + /// aligned to `mem::align_of::()`. + #[inline] + pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { + debug_assert_eq!(ptr.align_offset(mem::align_of::()), 0); + unsafe { + x86::_mm_store_si128(ptr.cast(), self.0); + } + } + + /// Returns a `BitMask` indicating all tags in the group which have + /// the given value. + #[inline] + pub(crate) fn match_tag(self, tag: Tag) -> BitMask { + #[expect( + clippy::cast_possible_wrap, // tag.0: Tag as i8 + // tag: i32 as u16 + // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the + // upper 16-bits of the i32 are zeroed: + clippy::cast_sign_loss, + clippy::cast_possible_truncation + )] + unsafe { + let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(tag.0 as i8)); + BitMask(x86::_mm_movemask_epi8(cmp) as u16) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY`. + #[inline] + pub(crate) fn match_empty(self) -> BitMask { + self.match_tag(Tag::EMPTY) + } + + /// Returns a `BitMask` indicating all tags in the group which are + /// `EMPTY` or `DELETED`. + #[inline] + pub(crate) fn match_empty_or_deleted(self) -> BitMask { + #[expect( + // tag: i32 as u16 + // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the + // upper 16-bits of the i32 are zeroed: + clippy::cast_sign_loss, + clippy::cast_possible_truncation + )] + unsafe { + // A tag is EMPTY or DELETED iff the high bit is set + BitMask(x86::_mm_movemask_epi8(self.0) as u16) + } + } + + /// Returns a `BitMask` indicating all tags in the group which are full. + #[inline] + pub(crate) fn match_full(&self) -> BitMask { + BitMask(!self.match_empty_or_deleted().0) + } + + /// Performs the following transformation on all tags in the group: + /// - `EMPTY => EMPTY` + /// - `DELETED => EMPTY` + /// - `FULL => DELETED` + #[inline] + pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { + // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 + // and high_bit = 0 (FULL) to 1000_0000 + // + // Here's this logic expanded to concrete values: + // let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false) + // 1111_1111 | 1000_0000 = 1111_1111 + // 0000_0000 | 1000_0000 = 1000_0000 + #[expect( + clippy::cast_possible_wrap, // tag: Tag::DELETED.0 as i8 + )] + unsafe { + let zero = x86::_mm_setzero_si128(); + let special = x86::_mm_cmpgt_epi8(zero, self.0); + Group(x86::_mm_or_si128( + special, + x86::_mm_set1_epi8(Tag::DELETED.0 as i8), + )) + } + } +} diff --git a/src/hashbrown/control/mod.rs b/src/hashbrown/control/mod.rs new file mode 100644 index 0000000..62ef8bf --- /dev/null +++ b/src/hashbrown/control/mod.rs @@ -0,0 +1,10 @@ +mod bitmask; +mod group; +mod tag; + +use self::bitmask::BitMask; +pub(crate) use self::{ + bitmask::BitMaskIter, + group::Group, + tag::{Tag, TagSliceExt}, +}; diff --git a/src/hashbrown/control/tag.rs b/src/hashbrown/control/tag.rs new file mode 100644 index 0000000..486bbba --- /dev/null +++ b/src/hashbrown/control/tag.rs @@ -0,0 +1,82 @@ +use core::{fmt, mem}; + +/// Single tag in a control group. +#[derive(Copy, Clone, PartialEq, Eq)] +#[repr(transparent)] +pub(crate) struct Tag(pub(super) u8); +impl Tag { + /// Control tag value for an empty bucket. + pub(crate) const EMPTY: Tag = Tag(0b1111_1111); + + /// Control tag value for a deleted bucket. + pub(crate) const DELETED: Tag = Tag(0b1000_0000); + + /// Checks whether a control tag represents a full bucket (top bit is clear). + #[inline] + pub(crate) const fn is_full(self) -> bool { + self.0 & 0x80 == 0 + } + + /// Checks whether a control tag represents a special value (top bit is set). + #[inline] + pub(crate) const fn is_special(self) -> bool { + self.0 & 0x80 != 0 + } + + /// Checks whether a special control value is EMPTY (just check 1 bit). + #[inline] + pub(crate) const fn special_is_empty(self) -> bool { + debug_assert!(self.is_special()); + self.0 & 0x01 != 0 + } + + /// Creates a control tag representing a full bucket with the given hash. + #[inline] + pub(crate) const fn full(hash: u64) -> Tag { + // Constant for function that grabs the top 7 bits of the hash. + const MIN_HASH_LEN: usize = if mem::size_of::() < mem::size_of::() { + mem::size_of::() + } else { + mem::size_of::() + }; + + // Grab the top 7 bits of the hash. While the hash is normally a full 64-bit + // value, some hash functions (such as FxHash) produce a usize result + // instead, which means that the top 32 bits are 0 on 32-bit platforms. + // So we use MIN_HASH_LEN constant to handle this. + let top7 = hash >> (MIN_HASH_LEN * 8 - 7); + Tag((top7 & 0x7f) as u8) // truncation + } +} +impl fmt::Debug for Tag { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_special() { + if self.special_is_empty() { + f.pad("EMPTY") + } else { + f.pad("DELETED") + } + } else { + f.debug_tuple("full").field(&(self.0 & 0x7F)).finish() + } + } +} + +/// Extension trait for slices of tags. +pub(crate) trait TagSliceExt { + /// Fills the control with the given tag. + fn fill_tag(&mut self, tag: Tag); + + /// Clears out the control. + #[inline] + fn fill_empty(&mut self) { + self.fill_tag(Tag::EMPTY); + } +} +impl TagSliceExt for [mem::MaybeUninit] { + #[inline] + fn fill_tag(&mut self, tag: Tag) { + // SAFETY: We have access to the entire slice, so, we can write to the entire slice. + unsafe { self.as_mut_ptr().write_bytes(tag.0, self.len()) } + } +} diff --git a/src/hashbrown/mod.rs b/src/hashbrown/mod.rs new file mode 100644 index 0000000..f45d3fb --- /dev/null +++ b/src/hashbrown/mod.rs @@ -0,0 +1,37 @@ +#![allow(dead_code)] + +pub mod alloc; +pub mod control; +pub mod raw; +pub mod scopeguard; +pub mod util; + +/// The error type for `try_reserve` methods. +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum TryReserveError { + /// Error due to the computed capacity exceeding the collection's maximum + /// (usually `isize::MAX` bytes). + CapacityOverflow, + + /// The memory allocator returned an error + AllocError { + /// The layout of the allocation request that failed. + layout: std::alloc::Layout, + }, +} + +// matches stdalloc::collections::TryReserveError +impl core::fmt::Display for TryReserveError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("memory allocation failed")?; + let reason = match self { + TryReserveError::CapacityOverflow => { + " because the computed capacity exceeded the collection's maximum" + } + TryReserveError::AllocError { .. } => " because the memory allocator returned an error", + }; + f.write_str(reason) + } +} + +impl core::error::Error for TryReserveError {} diff --git a/src/hashbrown/raw.rs b/src/hashbrown/raw.rs new file mode 100644 index 0000000..64752d5 --- /dev/null +++ b/src/hashbrown/raw.rs @@ -0,0 +1,4558 @@ +use super::control::{BitMaskIter, Group, Tag, TagSliceExt}; +use super::scopeguard::{guard, ScopeGuard}; +use super::util::{invalid_mut, likely, unlikely}; +use super::TryReserveError; +use core::array; +use core::iter::FusedIterator; +use core::marker::PhantomData; +use core::mem; +use core::ptr; +use core::ptr::NonNull; +use core::slice; +use std::alloc::{handle_alloc_error, Layout}; + +#[cfg(test)] +use super::alloc::AllocError; +use super::alloc::{do_alloc, Allocator, Global}; + +#[inline] +unsafe fn offset_from(to: *const T, from: *const T) -> usize { + unsafe { to.offset_from(from) as usize } +} + +/// Whether memory allocation errors should return an error or abort. +#[derive(Copy, Clone)] +enum Fallibility { + Fallible, + Infallible, +} + +impl Fallibility { + /// Error to return on capacity overflow. + #[cfg_attr(feature = "inline-more", inline)] + fn capacity_overflow(self) -> TryReserveError { + match self { + Fallibility::Fallible => TryReserveError::CapacityOverflow, + Fallibility::Infallible => panic!("Hash table capacity overflow"), + } + } + + /// Error to return on allocation error. + #[cfg_attr(feature = "inline-more", inline)] + fn alloc_err(self, layout: Layout) -> TryReserveError { + match self { + Fallibility::Fallible => TryReserveError::AllocError { layout }, + Fallibility::Infallible => handle_alloc_error(layout), + } + } +} + +trait SizedTypeProperties: Sized { + const IS_ZERO_SIZED: bool = mem::size_of::() == 0; + const NEEDS_DROP: bool = mem::needs_drop::(); +} + +impl SizedTypeProperties for T {} + +/// Primary hash function, used to select the initial bucket to probe from. +#[inline] +#[expect(clippy::cast_possible_truncation)] +fn h1(hash: u64) -> usize { + // On 32-bit platforms we simply ignore the higher hash bits. + hash as usize +} + +/// Probe sequence based on triangular numbers, which is guaranteed (since our +/// table size is a power of two) to visit every group of elements exactly once. +/// +/// A triangular probe has us jump by 1 more group every time. So first we +/// jump by 1 group (meaning we just continue our linear scan), then 2 groups +/// (skipping over 1 group), then 3 groups (skipping over 2 groups), and so on. +/// +/// Proof that the probe will visit every group in the table: +/// +#[derive(Clone)] +struct ProbeSeq { + pos: usize, + stride: usize, +} + +impl ProbeSeq { + #[inline] + fn move_next(&mut self, bucket_mask: usize) { + // We should have found an empty bucket by now and ended the probe. + debug_assert!( + self.stride <= bucket_mask, + "Went past end of probe sequence" + ); + + self.stride += Group::WIDTH; + self.pos += self.stride; + self.pos &= bucket_mask; + } +} + +/// Returns the number of buckets needed to hold the given number of items, +/// taking the maximum load factor into account. +/// +/// Returns `None` if an overflow occurs. +/// +/// This ensures that `buckets * table_layout.size >= table_layout.ctrl_align`. +// Workaround for emscripten bug emscripten-core/emscripten-fastcomp#258 +#[cfg_attr(target_os = "emscripten", inline(never))] +#[cfg_attr(not(target_os = "emscripten"), inline)] +fn capacity_to_buckets(cap: usize, table_layout: TableLayout) -> Option { + debug_assert_ne!(cap, 0); + + // For small tables we require at least 1 empty bucket so that lookups are + // guaranteed to terminate if an element doesn't exist in the table. + if cap < 15 { + // Consider a small TableLayout like { size: 1, ctrl_align: 16 } on a + // platform with Group::WIDTH of 16 (like x86_64 with SSE2). For small + // bucket sizes, this ends up wasting quite a few bytes just to pad to + // the relatively larger ctrl_align: + // + // | capacity | buckets | bytes allocated | bytes per item | + // | -------- | ------- | --------------- | -------------- | + // | 3 | 4 | 36 | (Yikes!) 12.0 | + // | 7 | 8 | 40 | (Poor) 5.7 | + // | 14 | 16 | 48 | 3.4 | + // | 28 | 32 | 80 | 3.3 | + // + // In general, buckets * table_layout.size >= table_layout.ctrl_align + // must be true to avoid these edges. This is implemented by adjusting + // the minimum capacity upwards for small items. This code only needs + // to handle ctrl_align which are less than or equal to Group::WIDTH, + // because valid layout sizes are always a multiple of the alignment, + // so anything with alignment over the Group::WIDTH won't hit this edge + // case. + + // This is brittle, e.g. if we ever add 32 byte groups, it will select + // 3 regardless of the table_layout.size. + let min_cap = match (Group::WIDTH, table_layout.size) { + (16, 0..=1) => 14, + (16, 2..=3) | (8, 0..=1) => 7, + _ => 3, + }; + let cap = min_cap.max(cap); + // We don't bother with a table size of 2 buckets since that can only + // hold a single element. Instead, we skip directly to a 4 bucket table + // which can hold 3 elements. + let buckets = if cap < 4 { + 4 + } else if cap < 8 { + 8 + } else { + 16 + }; + ensure_bucket_bytes_at_least_ctrl_align(table_layout, buckets); + return Some(buckets); + } + + // Otherwise require 1/8 buckets to be empty (87.5% load) + // + // Be careful when modifying this, calculate_layout relies on the + // overflow check here. + let adjusted_cap = cap.checked_mul(8)? / 7; + + // Any overflows will have been caught by the checked_mul. Also, any + // rounding errors from the division above will be cleaned up by + // next_power_of_two (which can't overflow because of the previous division). + let buckets = adjusted_cap.next_power_of_two(); + ensure_bucket_bytes_at_least_ctrl_align(table_layout, buckets); + Some(buckets) +} + +// `maximum_buckets_in` relies on the property that for non-ZST `T`, any +// chosen `buckets` will satisfy `buckets * table_layout.size >= +// table_layout.ctrl_align`, so `calculate_layout_for` does not need to add +// extra padding beyond `table_layout.size * buckets`. If small-table bucket +// selection or growth policy changes, revisit `maximum_buckets_in`. +#[inline] +fn ensure_bucket_bytes_at_least_ctrl_align(table_layout: TableLayout, buckets: usize) { + if table_layout.size != 0 { + let prod = table_layout.size.saturating_mul(buckets); + debug_assert!(prod >= table_layout.ctrl_align); + } +} + +/// Returns the maximum effective capacity for the given bucket mask, taking +/// the maximum load factor into account. +#[inline] +fn bucket_mask_to_capacity(bucket_mask: usize) -> usize { + if bucket_mask < 8 { + // For tables with 1/2/4/8 buckets, we always reserve one empty slot. + // Keep in mind that the bucket mask is one less than the bucket count. + bucket_mask + } else { + // For larger tables we reserve 12.5% of the slots as empty. + ((bucket_mask + 1) / 8) * 7 + } +} + +/// Helper which allows the max calculation for `ctrl_align` to be statically computed for each `T` +/// while keeping the rest of `calculate_layout_for` independent of `T` +#[derive(Copy, Clone)] +struct TableLayout { + size: usize, + ctrl_align: usize, +} + +impl TableLayout { + #[inline] + const fn new() -> Self { + let layout = Layout::new::(); + Self { + size: layout.size(), + ctrl_align: if layout.align() > Group::WIDTH { + layout.align() + } else { + Group::WIDTH + }, + } + } + + #[inline] + fn calculate_layout_for(self, buckets: usize) -> Option<(Layout, usize)> { + debug_assert!(buckets.is_power_of_two()); + + let TableLayout { size, ctrl_align } = self; + // Manual layout calculation since Layout methods are not yet stable. + let ctrl_offset = + size.checked_mul(buckets)?.checked_add(ctrl_align - 1)? & !(ctrl_align - 1); + let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?; + + // We need an additional check to ensure that the allocation doesn't + // exceed `isize::MAX` (https://github.com/rust-lang/rust/pull/95295). + if len > isize::MAX as usize - (ctrl_align - 1) { + return None; + } + + Some(( + unsafe { Layout::from_size_align_unchecked(len, ctrl_align) }, + ctrl_offset, + )) + } +} + +/// A reference to a hash table bucket containing a `T`. +/// +/// This is usually just a pointer to the element itself. However if the element +/// is a ZST, then we instead track the index of the element in the table so +/// that `erase` works properly. +pub struct Bucket { + // Actually it is pointer to next element than element itself + // this is needed to maintain pointer arithmetic invariants + // keeping direct pointer to element introduces difficulty. + // Using `NonNull` for variance and niche layout + ptr: NonNull, +} + +// This Send impl is needed for rayon support. This is safe since Bucket is +// never exposed in a public API. +unsafe impl Send for Bucket {} + +impl Clone for Bucket { + #[inline] + fn clone(&self) -> Self { + Self { ptr: self.ptr } + } +} + +impl Bucket { + /// Creates a [`Bucket`] that contain pointer to the data. + /// The pointer calculation is performed by calculating the + /// offset from given `base` pointer (convenience for + /// `base.as_ptr().sub(index)`). + /// + /// `index` is in units of `T`; e.g., an `index` of 3 represents a pointer + /// offset of `3 * size_of::()` bytes. + /// + /// If the `T` is a ZST, then we instead track the index of the element + /// in the table so that `erase` works properly (return + /// `NonNull::new_unchecked((index + 1) as *mut T)`) + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived + /// from the safety rules for [`<*mut T>::sub`] method of `*mut T` and the safety + /// rules of [`NonNull::new_unchecked`] function. + /// + /// Thus, in order to uphold the safety contracts for the [`<*mut T>::sub`] method + /// and [`NonNull::new_unchecked`] function, as well as for the correct + /// logic of the work of this crate, the following rules are necessary and + /// sufficient: + /// + /// * the `base` pointer must not be `dangling` and must points to the + /// end of the first `value element` from the `data part` of the table, i.e. + /// must be the pointer that returned by [`RawTable::data_end`] or by + /// [`RawTableInner::data_end`]; + /// + /// * `index` must not be greater than `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` + /// must be no greater than the number returned by the function + /// [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the + /// `index` must not be greater than `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` + /// must be no greater than the number returned by the function + /// [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`]. + #[inline] + unsafe fn from_base_index(base: NonNull, index: usize) -> Self { + // If mem::size_of::() != 0 then return a pointer to an `element` in + // the data part of the table (we start counting from "0", so that + // in the expression T[last], the "last" index actually one less than the + // "buckets" number in the table, i.e. "last = RawTableInner.bucket_mask"): + // + // `from_base_index(base, 1).as_ptr()` returns a pointer that + // points here in the data part of the table + // (to the start of T1) + // | + // | `base: NonNull` must point here + // | (to the end of T0 or to the start of C0) + // v v + // [Padding], Tlast, ..., |T1|, T0, |C0, C1, ..., Clast + // ^ + // `from_base_index(base, 1)` returns a pointer + // that points here in the data part of the table + // (to the end of T1) + // + // where: T0...Tlast - our stored data; C0...Clast - control bytes + // or metadata for data. + let ptr = if T::IS_ZERO_SIZED { + // won't overflow because index must be less than length (bucket_mask) + // and bucket_mask is guaranteed to be less than `isize::MAX` + // (see TableLayout::calculate_layout_for method) + invalid_mut(index + 1) + } else { + unsafe { base.as_ptr().sub(index) } + }; + Self { + ptr: unsafe { NonNull::new_unchecked(ptr) }, + } + } + + /// Calculates the index of a [`Bucket`] as distance between two pointers + /// (convenience for `base.as_ptr().offset_from(self.ptr.as_ptr()) as usize`). + /// The returned value is in units of T: the distance in bytes divided by + /// [`core::mem::size_of::()`]. + /// + /// If the `T` is a ZST, then we return the index of the element in + /// the table so that `erase` works properly (return `self.ptr.as_ptr() as usize - 1`). + /// + /// This function is the inverse of [`from_base_index`]. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived + /// from the safety rules for [`<*const T>::offset_from`] method of `*const T`. + /// + /// Thus, in order to uphold the safety contracts for [`<*const T>::offset_from`] + /// method, as well as for the correct logic of the work of this crate, the + /// following rules are necessary and sufficient: + /// + /// * `base` contained pointer must not be `dangling` and must point to the + /// end of the first `element` from the `data part` of the table, i.e. + /// must be a pointer that returns by [`RawTable::data_end`] or by + /// [`RawTableInner::data_end`]; + /// + /// * `self` also must not contain dangling pointer; + /// + /// * both `self` and `base` must be created from the same [`RawTable`] + /// (or [`RawTableInner`]). + /// + /// If `mem::size_of::() == 0`, this function is always safe. + #[inline] + unsafe fn to_base_index(&self, base: NonNull) -> usize { + // If mem::size_of::() != 0 then return an index under which we used to store the + // `element` in the data part of the table (we start counting from "0", so + // that in the expression T[last], the "last" index actually is one less than the + // "buckets" number in the table, i.e. "last = RawTableInner.bucket_mask"). + // For example for 5th element in table calculation is performed like this: + // + // mem::size_of::() + // | + // | `self = from_base_index(base, 5)` that returns pointer + // | that points here in the data part of the table + // | (to the end of T5) + // | | `base: NonNull` must point here + // v | (to the end of T0 or to the start of C0) + // /???\ v v + // [Padding], Tlast, ..., |T10|, ..., T5|, T4, T3, T2, T1, T0, |C0, C1, C2, C3, C4, C5, ..., C10, ..., Clast + // \__________ __________/ + // \/ + // `bucket.to_base_index(base)` = 5 + // (base.as_ptr() as usize - self.ptr.as_ptr() as usize) / mem::size_of::() + // + // where: T0...Tlast - our stored data; C0...Clast - control bytes or metadata for data. + if T::IS_ZERO_SIZED { + // this can not be UB + self.ptr.as_ptr() as usize - 1 + } else { + unsafe { offset_from(base.as_ptr(), self.ptr.as_ptr()) } + } + } + + /// Acquires the underlying raw pointer `*mut T` to `data`. + /// + /// # Note + /// + /// If `T` is not [`Copy`], do not use `*mut T` methods that can cause calling the + /// destructor of `T` (for example the [`<*mut T>::drop_in_place`] method), because + /// for properly dropping the data we also need to clear `data` control bytes. If we + /// drop data, but do not clear `data control byte` it leads to double drop when + /// [`RawTable`] goes out of scope. + /// + /// If you modify an already initialized `value`, so [`Hash`] and [`Eq`] on the new + /// `T` value and its borrowed form *must* match those for the old `T` value, as the map + /// will not re-evaluate where the new value should go, meaning the value may become + /// "lost" if their location does not reflect their state. + #[inline] + pub fn as_ptr(&self) -> *mut T { + if T::IS_ZERO_SIZED { + // Just return an arbitrary ZST pointer which is properly aligned + // invalid pointer is good enough for ZST + invalid_mut(mem::align_of::()) + } else { + unsafe { self.ptr.as_ptr().sub(1) } + } + } + + /// Acquires the underlying non-null pointer `*mut T` to `data`. + #[inline] + fn as_non_null(&self) -> NonNull { + // SAFETY: `self.ptr` is already a `NonNull` + unsafe { NonNull::new_unchecked(self.as_ptr()) } + } + + /// Create a new [`Bucket`] that is offset from the `self` by the given + /// `offset`. The pointer calculation is performed by calculating the + /// offset from `self` pointer (convenience for `self.ptr.as_ptr().sub(offset)`). + /// This function is used for iterators. + /// + /// `offset` is in units of `T`; e.g., a `offset` of 3 represents a pointer + /// offset of `3 * size_of::()` bytes. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived + /// from the safety rules for [`<*mut T>::sub`] method of `*mut T` and safety + /// rules of [`NonNull::new_unchecked`] function. + /// + /// Thus, in order to uphold the safety contracts for [`<*mut T>::sub`] method + /// and [`NonNull::new_unchecked`] function, as well as for the correct + /// logic of the work of this crate, the following rules are necessary and + /// sufficient: + /// + /// * `self` contained pointer must not be `dangling`; + /// + /// * `self.to_base_index() + offset` must not be greater than `RawTableInner.bucket_mask`, + /// i.e. `(self.to_base_index() + offset) <= RawTableInner.bucket_mask` or, in other + /// words, `self.to_base_index() + offset + 1` must be no greater than the number returned + /// by the function [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the + /// `self.to_base_index() + offset` must not be greater than `RawTableInner.bucket_mask`, + /// i.e. `(self.to_base_index() + offset) <= RawTableInner.bucket_mask` or, in other words, + /// `self.to_base_index() + offset + 1` must be no greater than the number returned by the + /// function [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`]. + #[inline] + unsafe fn next_n(&self, offset: usize) -> Self { + let ptr = if T::IS_ZERO_SIZED { + // invalid pointer is good enough for ZST + invalid_mut(self.ptr.as_ptr() as usize + offset) + } else { + unsafe { self.ptr.as_ptr().sub(offset) } + }; + Self { + ptr: unsafe { NonNull::new_unchecked(ptr) }, + } + } + + /// Executes the destructor (if any) of the pointed-to `data`. + /// + /// # Safety + /// + /// See [`ptr::drop_in_place`] for safety concerns. + /// + /// You should use [`RawTable::erase`] instead of this function, + /// or be careful with calling this function directly, because for + /// properly dropping the data we need also clear `data` control bytes. + /// If we drop data, but do not erase `data control byte` it leads to + /// double drop when [`RawTable`] goes out of scope. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn drop(&self) { + unsafe { + self.as_ptr().drop_in_place(); + } + } + + /// Reads the `value` from `self` without moving it. This leaves the + /// memory in `self` unchanged. + /// + /// # Safety + /// + /// See [`ptr::read`] for safety concerns. + /// + /// You should use [`RawTable::remove`] instead of this function, + /// or be careful with calling this function directly, because compiler + /// calls its destructor when the read `value` goes out of scope. It + /// can cause double dropping when [`RawTable`] goes out of scope, + /// because of not erased `data control byte`. + #[inline] + pub unsafe fn read(&self) -> T { + unsafe { self.as_ptr().read() } + } + + /// Overwrites a memory location with the given `value` without reading + /// or dropping the old value (like [`ptr::write`] function). + /// + /// # Safety + /// + /// See [`ptr::write`] for safety concerns. + /// + /// # Note + /// + /// [`Hash`] and [`Eq`] on the new `T` value and its borrowed form *must* match + /// those for the old `T` value, as the map will not re-evaluate where the new + /// value should go, meaning the value may become "lost" if their location + /// does not reflect their state. + #[inline] + pub unsafe fn write(&self, val: T) { + unsafe { + self.as_ptr().write(val); + } + } + + /// Returns a shared immutable reference to the `value`. + /// + /// # Safety + /// + /// See [`NonNull::as_ref`] for safety concerns. + #[inline] + pub unsafe fn as_ref<'a>(&self) -> &'a T { + unsafe { &*self.as_ptr() } + } + + /// Returns a unique mutable reference to the `value`. + /// + /// # Safety + /// + /// See [`NonNull::as_mut`] for safety concerns. + /// + /// # Note + /// + /// [`Hash`] and [`Eq`] on the new `T` value and its borrowed form *must* match + /// those for the old `T` value, as the map will not re-evaluate where the new + /// value should go, meaning the value may become "lost" if their location + /// does not reflect their state. + #[inline] + pub unsafe fn as_mut<'a>(&self) -> &'a mut T { + unsafe { &mut *self.as_ptr() } + } +} + +/// A raw hash table with an unsafe API. +pub struct RawTable { + table: RawTableInner, + alloc: A, + // Tell dropck that we own instances of T. + marker: PhantomData, +} + +/// Non-generic part of `RawTable` which allows functions to be instantiated only once regardless +/// of how many different key-value types are used. +struct RawTableInner { + // Mask to get an index from a hash value. The value is one less than the + // number of buckets in the table. + bucket_mask: usize, + + // [Padding], T_n, ..., T1, T0, C0, C1, ... + // ^ points here + ctrl: NonNull, + + // Number of elements that can be inserted before we need to grow the table + growth_left: usize, + + // Number of elements in the table, only really used by len() + items: usize, +} + +impl RawTable { + /// Creates a new empty hash table without allocating any memory. + /// + /// In effect this returns a table with exactly 1 bucket. However we can + /// leave the data pointer dangling since that bucket is never written to + /// due to our load factor forcing us to always have at least 1 free bucket. + #[inline] + pub const fn new() -> Self { + Self { + table: RawTableInner::NEW, + alloc: Global, + marker: PhantomData, + } + } + + /// Allocates a new hash table with at least enough capacity for inserting + /// the given number of elements without reallocating. + pub fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_in(capacity, Global) + } +} + +impl RawTable { + const TABLE_LAYOUT: TableLayout = TableLayout::new::(); + + /// Creates a new empty hash table without allocating any memory, using the + /// given allocator. + /// + /// In effect this returns a table with exactly 1 bucket. However we can + /// leave the data pointer dangling since that bucket is never written to + /// due to our load factor forcing us to always have at least 1 free bucket. + #[inline] + pub const fn new_in(alloc: A) -> Self { + Self { + table: RawTableInner::NEW, + alloc, + marker: PhantomData, + } + } + + /// Allocates a new hash table with the given number of buckets. + /// + /// The control bytes are left uninitialized. + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new_uninitialized( + alloc: A, + buckets: usize, + fallibility: Fallibility, + ) -> Result { + debug_assert!(buckets.is_power_of_two()); + + Ok(Self { + table: unsafe { + RawTableInner::new_uninitialized(&alloc, Self::TABLE_LAYOUT, buckets, fallibility) + }?, + alloc, + marker: PhantomData, + }) + } + + /// Allocates a new hash table using the given allocator, with at least enough capacity for + /// inserting the given number of elements without reallocating. + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { + Self { + table: RawTableInner::with_capacity(&alloc, Self::TABLE_LAYOUT, capacity), + alloc, + marker: PhantomData, + } + } + + /// Returns a reference to the underlying allocator. + #[inline] + pub fn allocator(&self) -> &A { + &self.alloc + } + + /// Returns pointer to one past last `data` element in the table as viewed from + /// the start point of the allocation. + /// + /// The caller must ensure that the `RawTable` outlives the returned [`NonNull`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + pub fn data_end(&self) -> NonNull { + // `self.table.ctrl.cast()` returns pointer that + // points here (to the end of `T0`) + // ∨ + // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m + // \________ ________/ + // \/ + // `n = buckets - 1`, i.e. `RawTable::num_buckets() - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`. + // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + // with loading `Group` bytes from the heap works properly, even if the result + // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + // `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + self.table.ctrl.cast() + } + + /// Returns pointer to start of data table. + #[inline] + pub unsafe fn data_start(&self) -> NonNull { + unsafe { NonNull::new_unchecked(self.data_end().as_ptr().wrapping_sub(self.num_buckets())) } + } + + /// Returns the total amount of memory allocated internally by the hash + /// table, in bytes. + /// + /// The returned number is informational only. It is intended to be + /// primarily used for memory profiling. + #[inline] + pub fn allocation_size(&self) -> usize { + // SAFETY: We use the same `table_layout` that was used to allocate + // this table. + unsafe { self.table.allocation_size_or_zero(Self::TABLE_LAYOUT) } + } + + /// Returns the index of a bucket from a `Bucket`. + #[inline] + pub unsafe fn bucket_index(&self, bucket: &Bucket) -> usize { + unsafe { bucket.to_base_index(self.data_end()) } + } + + /// Returns a pointer to an element in the table. + /// + /// The caller must ensure that the `RawTable` outlives the returned [`Bucket`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the caller of this function must observe the + /// following safety rules: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTable::num_buckets`] + /// function, i.e. `(index + 1) <= self.num_buckets()`. + /// + /// It is safe to call this function with index of zero (`index == 0`) on a table that has + /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the `index` must + /// not be greater than the number returned by the [`RawTable::num_buckets`] function, i.e. + /// `(index + 1) <= self.num_buckets()`. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + pub unsafe fn bucket(&self, index: usize) -> Bucket { + // If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + // (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + // the "buckets" number of our `RawTable`, i.e. "n = RawTable::num_buckets() - 1"): + // + // `table.bucket(3).as_ptr()` returns a pointer that points here in the `data` + // part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`) + // | + // | `base = self.data_end()` points here + // | (to the start of CT0 or to the end of T0) + // v v + // [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + // ^ \__________ __________/ + // `table.bucket(3)` returns a pointer that points \/ + // here in the `data` part of the `RawTable` (to additional control bytes + // the end of T3) `m = Group::WIDTH - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`; + // CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + // the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask` + // is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + // of buckets is a power of two, and `self.table.bucket_mask = self.num_buckets() - 1`. + debug_assert_ne!(self.table.bucket_mask, 0); + debug_assert!(index < self.num_buckets()); + unsafe { Bucket::from_base_index(self.data_end(), index) } + } + + /// Erases an element from the table without dropping it. + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn erase_no_drop(&mut self, item: &Bucket) { + unsafe { + let index = self.bucket_index(item); + self.table.erase(index); + } + } + + /// Erases an element from the table, dropping it in place. + #[cfg_attr(feature = "inline-more", inline)] + #[expect(clippy::needless_pass_by_value)] + pub unsafe fn erase(&mut self, item: Bucket) { + unsafe { + // Erase the element from the table first since drop might panic. + self.erase_no_drop(&item); + item.drop(); + } + } + + /// Removes an element from the table, returning it. + /// + /// This also returns an index to the newly free bucket. + #[cfg_attr(feature = "inline-more", inline)] + #[expect(clippy::needless_pass_by_value)] + pub unsafe fn remove(&mut self, item: Bucket) -> (T, usize) { + unsafe { + self.erase_no_drop(&item); + (item.read(), self.bucket_index(&item)) + } + } + + /// Removes an element from the table, returning it. + /// + /// This also returns an index to the newly free bucket + /// and the former `Tag` for that bucket. + #[cfg_attr(feature = "inline-more", inline)] + #[expect(clippy::needless_pass_by_value)] + pub unsafe fn remove_tagged(&mut self, item: Bucket) -> (T, usize, Tag) { + unsafe { + let index = self.bucket_index(&item); + let tag = *self.table.ctrl(index); + self.table.erase(index); + (item.read(), index, tag) + } + } + + /// Finds and removes an element from the table, returning it. + #[cfg_attr(feature = "inline-more", inline)] + pub fn remove_entry( + &mut self, + hash: u64, + eq: impl FnMut(&T) -> Result, + ) -> Result, E> { + // Avoid `Option::map` because it bloats LLVM IR. + match self.find(hash, eq)? { + Some(bucket) => Ok(Some(unsafe { self.remove(bucket).0 })), + None => Ok(None), + } + } + + /// Marks all table buckets as empty without dropping their contents. + #[cfg_attr(feature = "inline-more", inline)] + pub fn clear_no_drop(&mut self) { + self.table.clear_no_drop(); + } + + /// Removes all elements from the table without freeing the backing memory. + #[cfg_attr(feature = "inline-more", inline)] + pub fn clear(&mut self) { + if self.is_empty() { + // Special case empty table to avoid surprising O(capacity) time. + return; + } + // Ensure that the table is reset even if one of the drops panic + let mut self_ = guard(self, |self_| self_.clear_no_drop()); + unsafe { + // SAFETY: ScopeGuard sets to zero the `items` field of the table + // even in case of panic during the dropping of the elements so + // that there will be no double drop of the elements. + self_.table.drop_elements::(); + } + } + + /// Shrinks the table to fit `max(self.len(), min_size)` elements. + #[cfg_attr(feature = "inline-more", inline)] + pub fn shrink_to(&mut self, min_size: usize, hasher: impl Fn(&T) -> u64) { + // Calculate the minimal number of elements that we need to reserve + // space for. + let min_size = usize::max(self.table.items, min_size); + if min_size == 0 { + let mut old_inner = mem::replace(&mut self.table, RawTableInner::NEW); + unsafe { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If any elements' drop function panics, then there will only be a memory leak, + // because we have replaced the inner table with a new one. + old_inner.drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); + } + return; + } + + // Calculate the number of buckets that we need for this number of + // elements. If the calculation overflows then the requested bucket + // count must be larger than what we have right and nothing needs to be + // done. + let Some(min_buckets) = capacity_to_buckets(min_size, Self::TABLE_LAYOUT) else { + return; + }; + + // If we have more buckets than we need, shrink the table. + if min_buckets < self.num_buckets() { + // Fast path if the table is empty + if self.table.items == 0 { + let new_inner = + RawTableInner::with_capacity(&self.alloc, Self::TABLE_LAYOUT, min_size); + let mut old_inner = mem::replace(&mut self.table, new_inner); + unsafe { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If any elements' drop function panics, then there will only be a memory leak, + // because we have replaced the inner table with a new one. + old_inner.drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); + } + } else { + // SAFETY: + // 1. We know for sure that `min_size >= self.table.items`. + // 2. The [`RawTableInner`] must already have properly initialized control bytes since + // we will never expose RawTable::new_uninitialized in a public API. + let result = unsafe { self.resize(min_size, hasher, Fallibility::Infallible) }; + + // SAFETY: The result of calling the `resize` function cannot be an error + // because `fallibility == Fallibility::Infallible. + unsafe { result.unwrap_unchecked() }; + } + } + } + + /// Ensures that at least `additional` items can be inserted into the table + /// without reallocation. + #[cfg_attr(feature = "inline-more", inline)] + pub fn reserve(&mut self, additional: usize, hasher: impl Fn(&T) -> u64) { + if likely(additional > self.table.growth_left) { + // SAFETY: The [`RawTableInner`] must already have properly initialized control + // bytes since we will never expose RawTable::new_uninitialized in a public API. + let result = + unsafe { self.reserve_rehash(additional, hasher, Fallibility::Infallible) }; + + // SAFETY: All allocation errors will be caught inside `RawTableInner::reserve_rehash`. + unsafe { result.unwrap_unchecked() }; + } + } + + /// Tries to ensure that at least `additional` items can be inserted into + /// the table without reallocation. + #[cfg_attr(feature = "inline-more", inline)] + pub fn try_reserve( + &mut self, + additional: usize, + hasher: impl Fn(&T) -> u64, + ) -> Result<(), TryReserveError> { + if additional > self.table.growth_left { + // SAFETY: The [`RawTableInner`] must already have properly initialized control + // bytes since we will never expose RawTable::new_uninitialized in a public API. + unsafe { self.reserve_rehash(additional, hasher, Fallibility::Fallible) } + } else { + Ok(()) + } + } + + /// Out-of-line slow path for `reserve` and `try_reserve`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes, + /// otherwise calling this function results in [`undefined behavior`] + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[cold] + #[inline(never)] + unsafe fn reserve_rehash( + &mut self, + additional: usize, + hasher: impl Fn(&T) -> u64, + fallibility: Fallibility, + ) -> Result<(), TryReserveError> { + unsafe { + // SAFETY: + // 1. We know for sure that `alloc` and `layout` matches the [`Allocator`] and + // [`TableLayout`] that were used to allocate this table. + // 2. The `drop` function is the actual drop function of the elements stored in + // the table. + // 3. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. + self.table.reserve_rehash_inner( + &self.alloc, + additional, + &|table, index| hasher(table.bucket::(index).as_ref()), + fallibility, + Self::TABLE_LAYOUT, + if T::NEEDS_DROP { + Some(|ptr| ptr::drop_in_place(ptr.cast::())) + } else { + None + }, + ) + } + } + + /// Allocates a new table of a different size and moves the contents of the + /// current table into it. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes, + /// otherwise calling this function results in [`undefined behavior`] + /// + /// The caller of this function must ensure that `capacity >= self.table.items` + /// otherwise: + /// + /// * If `self.table.items != 0`, calling of this function with `capacity` + /// equal to 0 (`capacity == 0`) results in [`undefined behavior`]. + /// + /// * If `self.table.items > capacity_to_buckets(capacity, Self::TABLE_LAYOUT)` + /// calling this function are never return (will loop infinitely). + /// + /// See [`RawTableInner::find_insert_index`] for more information. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + unsafe fn resize( + &mut self, + capacity: usize, + hasher: impl Fn(&T) -> u64, + fallibility: Fallibility, + ) -> Result<(), TryReserveError> { + // SAFETY: + // 1. The caller of this function guarantees that `capacity >= self.table.items`. + // 2. We know for sure that `alloc` and `layout` matches the [`Allocator`] and + // [`TableLayout`] that were used to allocate this table. + // 3. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. + unsafe { + self.table.resize_inner( + &self.alloc, + capacity, + &|table, index| hasher(table.bucket::(index).as_ref()), + fallibility, + Self::TABLE_LAYOUT, + ) + } + } + + /// Inserts a new element into the table, and returns its raw bucket. + /// + /// This does not check if the given element already exists in the table. + #[cfg_attr(feature = "inline-more", inline)] + pub fn insert(&mut self, hash: u64, value: T, hasher: impl Fn(&T) -> u64) -> Bucket { + unsafe { + // SAFETY: + // 1. The [`RawTableInner`] must already have properly initialized control bytes since + // we will never expose `RawTable::new_uninitialized` in a public API. + // + // 2. We reserve additional space (if necessary) right after calling this function. + let mut index = self.table.find_insert_index(hash); + + // We can avoid growing the table once we have reached our load factor if we are replacing + // a tombstone. This works since the number of EMPTY slots does not change in this case. + // + // SAFETY: The function is guaranteed to return an index in the range `0..=self.num_buckets()`. + let old_ctrl = *self.table.ctrl(index); + if unlikely(self.table.growth_left == 0 && old_ctrl.special_is_empty()) { + self.reserve(1, hasher); + // SAFETY: We know for sure that `RawTableInner` has control bytes + // initialized and that there is extra space in the table. + index = self.table.find_insert_index(hash); + } + + self.insert_at_index(hash, index, value) + } + } + + /// Inserts a new element into the table, and returns a mutable reference to it. + /// + /// This does not check if the given element already exists in the table. + #[cfg_attr(feature = "inline-more", inline)] + pub fn insert_entry(&mut self, hash: u64, value: T, hasher: impl Fn(&T) -> u64) -> &mut T { + unsafe { self.insert(hash, value, hasher).as_mut() } + } + + /// Inserts a new element into the table, without growing the table. + /// + /// There must be enough space in the table to insert the new element. + /// + /// This does not check if the given element already exists in the table. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn insert_no_grow(&mut self, hash: u64, value: T) -> Bucket { + unsafe { + let (index, old_ctrl) = self.table.prepare_insert_index(hash); + let bucket = self.table.bucket(index); + + // If we are replacing a DELETED entry then we don't need to update + // the load counter. + self.table.growth_left -= old_ctrl.special_is_empty() as usize; + + bucket.write(value); + self.table.items += 1; + bucket + } + } + + /// Temporarily removes a bucket, applying the given function to the removed + /// element and optionally put back the returned value in the same bucket. + /// + /// Returns tag for bucket if the bucket is emptied out. + /// + /// This does not check if the given bucket is actually occupied. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn replace_bucket_with(&mut self, bucket: Bucket, f: F) -> Option + where + F: FnOnce(T) -> Option, + { + unsafe { + let index = self.bucket_index(&bucket); + let old_ctrl = *self.table.ctrl(index); + debug_assert!(self.is_bucket_full(index)); + let old_growth_left = self.table.growth_left; + let item = self.remove(bucket).0; + if let Some(new_item) = f(item) { + self.table.growth_left = old_growth_left; + self.table.set_ctrl(index, old_ctrl); + self.table.items += 1; + self.bucket(index).write(new_item); + None + } else { + Some(old_ctrl) + } + } + } + + /// Searches for an element in the table. If the element is not found, + /// returns `Err` with the position of a slot where an element with the + /// same hash could be inserted. + /// + /// This function may resize the table if additional space is required for + /// inserting an element. + #[inline] + pub fn find_or_find_insert_index( + &mut self, + hash: u64, + mut eq: impl FnMut(&T) -> Result, + hasher: impl Fn(&T) -> u64, + ) -> Result, usize>, E> { + self.reserve(1, hasher); + + unsafe { + // SAFETY: + // 1. We know for sure that there is at least one empty `bucket` in the table. + // 2. The [`RawTableInner`] must already have properly initialized control bytes since we will + // never expose `RawTable::new_uninitialized` in a public API. + // 3. The `find_or_find_insert_index_inner` function returns the `index` of only the full bucket, + // which is in the range `0..self.num_buckets()` (since there is at least one empty `bucket` in + // the table), so calling `self.bucket(index)` and `Bucket::as_ref` is safe. + let indexes = self + .table + .find_or_find_insert_index_inner(hash, &mut |index| { + eq(self.bucket(index).as_ref()) + })?; + + match indexes { + // SAFETY: See explanation above. + Ok(index) => Ok(Ok(self.bucket(index))), + Err(index) => Ok(Err(index)), + } + } + } + + /// Inserts a new element into the table at the given index with the given hash, + /// and returns its raw bucket. + /// + /// # Safety + /// + /// `index` must point to a slot previously returned by + /// `find_or_find_insert_index`, and no mutation of the table must have + /// occurred since that call. + #[inline] + pub unsafe fn insert_at_index(&mut self, hash: u64, index: usize, value: T) -> Bucket { + unsafe { self.insert_tagged_at_index(Tag::full(hash), index, value) } + } + + /// Inserts a new element into the table at the given index with the given tag, + /// and returns its raw bucket. + /// + /// # Safety + /// + /// `index` must point to a slot previously returned by + /// `find_or_find_insert_index`, and no mutation of the table must have + /// occurred since that call. + #[inline] + pub unsafe fn insert_tagged_at_index(&mut self, tag: Tag, index: usize, value: T) -> Bucket { + unsafe { + let old_ctrl = *self.table.ctrl(index); + self.table.record_item_insert_at(index, old_ctrl, tag); + + let bucket = self.bucket(index); + bucket.write(value); + bucket + } + } + + /// Searches for an element in the table. + #[inline] + pub fn find( + &self, + hash: u64, + mut eq: impl FnMut(&T) -> Result, + ) -> Result>, E> { + unsafe { + // SAFETY: + // 1. The [`RawTableInner`] must already have properly initialized control bytes since we + // will never expose `RawTable::new_uninitialized` in a public API. + // 1. The `find_inner` function returns the `index` of only the full bucket, which is in + // the range `0..self.num_buckets()`, so calling `self.bucket(index)` and `Bucket::as_ref` + // is safe. + let result = self + .table + .find_inner(hash, &mut |index| eq(self.bucket(index).as_ref()))?; + + // Avoid `Option::map` because it bloats LLVM IR. + match result { + // SAFETY: See explanation above. + Some(index) => Ok(Some(self.bucket(index))), + None => Ok(None), + } + } + } + + /// Gets a reference to an element in the table. + #[inline] + pub fn get( + &self, + hash: u64, + eq: impl FnMut(&T) -> Result, + ) -> Result, E> { + // Avoid `Option::map` because it bloats LLVM IR. + match self.find(hash, eq)? { + Some(bucket) => Ok(Some(unsafe { bucket.as_ref() })), + None => Ok(None), + } + } + + /// Gets a mutable reference to an element in the table. + #[inline] + #[allow(clippy::mut_from_ref)] + pub fn get_mut( + &self, + hash: u64, + eq: impl FnMut(&T) -> Result, + ) -> Result, E> { + // Avoid `Option::map` because it bloats LLVM IR. + match self.find(hash, eq)? { + Some(bucket) => Ok(Some(unsafe { bucket.as_mut() })), + None => Ok(None), + } + } + + /// Gets a reference to an element in the table at the given bucket index. + #[inline] + pub fn get_bucket(&self, index: usize) -> Option<&T> { + unsafe { + if index < self.num_buckets() && self.is_bucket_full(index) { + Some(self.bucket(index).as_ref()) + } else { + None + } + } + } + + /// Gets a mutable reference to an element in the table at the given bucket index. + #[inline] + pub fn get_bucket_mut(&mut self, index: usize) -> Option<&mut T> { + unsafe { + if index < self.num_buckets() && self.is_bucket_full(index) { + Some(self.bucket(index).as_mut()) + } else { + None + } + } + } + + /// Returns a pointer to an element in the table, but only after verifying that + /// the index is in-bounds and the bucket is occupied. + #[inline] + pub fn checked_bucket(&self, index: usize) -> Option> { + unsafe { + if index < self.num_buckets() && self.is_bucket_full(index) { + Some(self.bucket(index)) + } else { + None + } + } + } + + /// Attempts to get mutable references to `N` entries in the table at once. + /// + /// Returns an array of length `N` with the results of each query. + /// + /// At most one mutable reference will be returned to any entry. `None` will be returned if any + /// of the hashes are duplicates. `None` will be returned if the hash is not found. + /// + /// The `eq` argument should be a closure such that `eq(i, k)` returns true if `k` is equal to + /// the `i`th key to be looked up. + pub fn get_disjoint_mut( + &mut self, + hashes: [u64; N], + eq: impl FnMut(usize, &T) -> bool, + ) -> [Option<&'_ mut T>; N] { + unsafe { + let ptrs = self.get_disjoint_mut_pointers(hashes, eq); + + for (i, cur) in ptrs.iter().enumerate() { + if cur.is_some() && ptrs[..i].contains(cur) { + panic!("duplicate keys found"); + } + } + // All bucket are distinct from all previous buckets so we're clear to return the result + // of the lookup. + + ptrs.map(|ptr| ptr.map(|mut ptr| ptr.as_mut())) + } + } + + pub unsafe fn get_disjoint_unchecked_mut( + &mut self, + hashes: [u64; N], + eq: impl FnMut(usize, &T) -> bool, + ) -> [Option<&'_ mut T>; N] { + let ptrs = unsafe { self.get_disjoint_mut_pointers(hashes, eq) }; + ptrs.map(|ptr| ptr.map(|mut ptr| unsafe { ptr.as_mut() })) + } + + unsafe fn get_disjoint_mut_pointers( + &mut self, + hashes: [u64; N], + mut eq: impl FnMut(usize, &T) -> bool, + ) -> [Option>; N] { + array::from_fn(|i| { + self.find(hashes[i], |k| Ok::<_, ()>(eq(i, k))) + .unwrap() + .map(|cur| cur.as_non_null()) + }) + } + + /// Returns the number of elements the map can hold without reallocating. + /// + /// This number is a lower bound; the table might be able to hold + /// more, but is guaranteed to be able to hold at least this many. + #[inline] + pub fn capacity(&self) -> usize { + self.table.items + self.table.growth_left + } + + /// Returns the number of elements in the table. + #[inline] + pub fn len(&self) -> usize { + self.table.items + } + + /// Returns `true` if the table contains no elements. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the number of buckets in the table. + #[inline] + pub fn num_buckets(&self) -> usize { + self.table.bucket_mask + 1 + } + + /// Checks whether the bucket at `index` is full. + /// + /// # Safety + /// + /// The caller must ensure `index` is less than the number of buckets. + #[inline] + pub unsafe fn is_bucket_full(&self, index: usize) -> bool { + unsafe { self.table.is_bucket_full(index) } + } + + /// Returns an iterator over every element in the table. It is up to + /// the caller to ensure that the `RawTable` outlives the `RawIter`. + /// Because we cannot make the `next` method unsafe on the `RawIter` + /// struct, we have to make the `iter` method unsafe. + #[inline] + pub unsafe fn iter(&self) -> RawIter { + // SAFETY: + // 1. The caller must uphold the safety contract for `iter` method. + // 2. The [`RawTableInner`] must already have properly initialized control bytes since + // we will never expose RawTable::new_uninitialized in a public API. + unsafe { self.table.iter() } + } + + /// Returns an iterator over occupied buckets that could match a given hash. + /// + /// `RawTable` only stores 7 bits of the hash value, so this iterator may + /// return items that have a hash value different than the one provided. You + /// should always validate the returned values before using them. + /// + /// It is up to the caller to ensure that the `RawTable` outlives the + /// `RawIterHash`. Because we cannot make the `next` method unsafe on the + /// `RawIterHash` struct, we have to make the `iter_hash` method unsafe. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash { + unsafe { RawIterHash::new(self, hash) } + } + + /// Returns an iterator over occupied bucket indices that could match a given hash. + /// + /// `RawTable` only stores 7 bits of the hash value, so this iterator may + /// return items that have a hash value different than the one provided. You + /// should always validate the returned values before using them. + /// + /// It is up to the caller to ensure that the `RawTable` outlives the + /// `RawIterHashIndices`. Because we cannot make the `next` method unsafe on the + /// `RawIterHashIndices` struct, we have to make the `iter_hash_buckets` method unsafe. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn iter_hash_buckets(&self, hash: u64) -> RawIterHashIndices { + unsafe { RawIterHashIndices::new(&self.table, hash) } + } + + /// Returns an iterator over full buckets indices in the table. + /// + /// See [`RawTableInner::full_buckets_indices`] for safety conditions. + #[inline(always)] + pub unsafe fn full_buckets_indices(&self) -> FullBucketsIndices { + unsafe { self.table.full_buckets_indices() } + } + + /// Returns an iterator which removes all elements from the table without + /// freeing the memory. + #[cfg_attr(feature = "inline-more", inline)] + pub fn drain(&mut self) -> RawDrain<'_, T, A> { + unsafe { + let iter = self.iter(); + self.drain_iter_from(iter) + } + } + + /// Returns an iterator which removes all elements from the table without + /// freeing the memory. + /// + /// Iteration starts at the provided iterator's current location. + /// + /// It is up to the caller to ensure that the iterator is valid for this + /// `RawTable` and covers all items that remain in the table. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn drain_iter_from(&mut self, iter: RawIter) -> RawDrain<'_, T, A> { + debug_assert_eq!(iter.len(), self.len()); + RawDrain { + iter, + table: mem::replace(&mut self.table, RawTableInner::NEW), + orig_table: NonNull::from(&mut self.table), + marker: PhantomData, + } + } + + /// Returns an iterator which consumes all elements from the table. + /// + /// Iteration starts at the provided iterator's current location. + /// + /// It is up to the caller to ensure that the iterator is valid for this + /// `RawTable` and covers all items that remain in the table. + pub unsafe fn into_iter_from(self, iter: RawIter) -> RawIntoIter { + debug_assert_eq!(iter.len(), self.len()); + + let allocation = self.into_allocation(); + RawIntoIter { + iter, + allocation, + marker: PhantomData, + } + } + + /// Converts the table into a raw allocation. The contents of the table + /// should be dropped using a `RawIter` before freeing the allocation. + #[cfg_attr(feature = "inline-more", inline)] + pub fn into_allocation(self) -> Option<(NonNull, Layout, A)> { + let alloc = if self.table.is_empty_singleton() { + None + } else { + let (layout, ctrl_offset) = { + let option = Self::TABLE_LAYOUT.calculate_layout_for(self.table.num_buckets()); + unsafe { option.unwrap_unchecked() } + }; + Some(( + unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().sub(ctrl_offset).cast()) }, + layout, + unsafe { ptr::read(&raw const self.alloc) }, + )) + }; + mem::forget(self); + alloc + } +} + +unsafe impl Send for RawTable +where + T: Send, + A: Send, +{ +} +unsafe impl Sync for RawTable +where + T: Sync, + A: Sync, +{ +} + +impl RawTableInner { + const NEW: Self = RawTableInner::new(); + + /// Creates a new empty hash table without allocating any memory. + /// + /// In effect this returns a table with exactly 1 bucket. However we can + /// leave the data pointer dangling since that bucket is never accessed + /// due to our load factor forcing us to always have at least 1 free bucket. + #[inline] + const fn new() -> Self { + Self { + // Be careful to cast the entire slice to a raw pointer. + ctrl: unsafe { + NonNull::new_unchecked(Group::static_empty().as_ptr().cast_mut().cast()) + }, + bucket_mask: 0, + items: 0, + growth_left: 0, + } + } +} + +/// Find the previous power of 2. If it's already a power of 2, it's unchanged. +/// Passing zero is undefined behavior. +pub fn prev_pow2(z: usize) -> usize { + let shift = mem::size_of::() * 8 - 1; + 1 << (shift - (z.leading_zeros() as usize)) +} + +/// Finds the largest number of buckets that can fit in `allocation_size` +/// provided the given TableLayout. +/// +/// This relies on some invariants of `capacity_to_buckets`, so only feed in +/// an `allocation_size` calculated from `capacity_to_buckets`. +fn maximum_buckets_in( + allocation_size: usize, + table_layout: TableLayout, + group_width: usize, +) -> usize { + // Given an equation like: + // z >= x * y + x + g + // x can be maximized by doing: + // x = (z - g) / (y + 1) + // If you squint: + // x is the number of buckets + // y is the table_layout.size + // z is the size of the allocation + // g is the group width + // But this is ignoring the padding needed for ctrl_align. + // If we remember these restrictions: + // x is always a power of 2 + // Layout size for T must always be a multiple of T + // Then the alignment can be ignored if we add the constraint: + // x * y >= table_layout.ctrl_align + // This is taken care of by `capacity_to_buckets`. + // It may be helpful to understand this if you remember that: + // ctrl_offset = align(x * y, ctrl_align) + let x = (allocation_size - group_width) / (table_layout.size + 1); + prev_pow2(x) +} + +impl RawTableInner { + /// Allocates a new [`RawTableInner`] with the given number of buckets. + /// The control bytes and buckets are left uninitialized. + /// + /// # Safety + /// + /// The caller of this function must ensure that the `buckets` is power of two + /// and also initialize all control bytes of the length `self.bucket_mask + 1 + + /// Group::WIDTH` with the [`Tag::EMPTY`] bytes. + /// + /// See also [`Allocator`] API for other safety concerns. + /// + /// [`Allocator`]: stdalloc::alloc::Allocator + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new_uninitialized( + alloc: &A, + table_layout: TableLayout, + mut buckets: usize, + fallibility: Fallibility, + ) -> Result + where + A: Allocator, + { + debug_assert!(buckets.is_power_of_two()); + + // Avoid `Option::ok_or_else` because it bloats LLVM IR. + let Some((layout, mut ctrl_offset)) = table_layout.calculate_layout_for(buckets) else { + return Err(fallibility.capacity_overflow()); + }; + + let ptr: NonNull = match do_alloc(alloc, layout) { + Ok(block) => { + // The allocator can't return a value smaller than was + // requested, so this can be != instead of >=. + if block.len() != layout.size() { + // Utilize over-sized allocations. + let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH); + debug_assert!(x >= buckets); + // Calculate the new ctrl_offset. + let (oversized_layout, oversized_ctrl_offset) = { + let option = table_layout.calculate_layout_for(x); + unsafe { option.unwrap_unchecked() } + }; + debug_assert!(oversized_layout.size() <= block.len()); + debug_assert!(oversized_ctrl_offset >= ctrl_offset); + ctrl_offset = oversized_ctrl_offset; + buckets = x; + } + + block.cast() + } + Err(_) => return Err(fallibility.alloc_err(layout)), + }; + + // SAFETY: null pointer will be caught in above check + let ctrl = unsafe { NonNull::new_unchecked(ptr.as_ptr().add(ctrl_offset)) }; + Ok(Self { + ctrl, + bucket_mask: buckets - 1, + items: 0, + growth_left: bucket_mask_to_capacity(buckets - 1), + }) + } + + /// Attempts to allocate a new [`RawTableInner`] with at least enough + /// capacity for inserting the given number of elements without reallocating. + /// + /// All the control bytes are initialized with the [`Tag::EMPTY`] bytes. + #[inline] + fn fallible_with_capacity( + alloc: &A, + table_layout: TableLayout, + capacity: usize, + fallibility: Fallibility, + ) -> Result + where + A: Allocator, + { + if capacity == 0 { + Ok(Self::NEW) + } else { + // SAFETY: We checked that we could successfully allocate the new table, and then + // initialized all control bytes with the constant `Tag::EMPTY` byte. + unsafe { + let buckets = capacity_to_buckets(capacity, table_layout) + .ok_or_else(|| fallibility.capacity_overflow())?; + + let mut result = + Self::new_uninitialized(alloc, table_layout, buckets, fallibility)?; + // SAFETY: We checked that the table is allocated and therefore the table already has + // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for) + // so writing `self.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe. + result.ctrl_slice().fill_empty(); + + Ok(result) + } + } + } + + /// Allocates a new [`RawTableInner`] with at least enough capacity for inserting + /// the given number of elements without reallocating. + /// + /// Panics if the new capacity exceeds [`isize::MAX`] bytes and [`abort`] the program + /// in case of allocation error. Use [`fallible_with_capacity`] instead if you want to + /// handle memory allocation failure. + /// + /// All the control bytes are initialized with the [`Tag::EMPTY`] bytes. + /// + /// [`fallible_with_capacity`]: RawTableInner::fallible_with_capacity + /// [`abort`]: stdalloc::abort::handle_alloc_error + fn with_capacity(alloc: &A, table_layout: TableLayout, capacity: usize) -> Self + where + A: Allocator, + { + let result = + Self::fallible_with_capacity(alloc, table_layout, capacity, Fallibility::Infallible); + + // SAFETY: All allocation errors will be caught inside `RawTableInner::new_uninitialized`. + unsafe { result.unwrap_unchecked() } + } + + /// Fixes up an insertion index returned by the [`RawTableInner::find_insert_index_in_group`] method. + /// + /// In tables smaller than the group width (`self.num_buckets() < Group::WIDTH`), trailing control + /// bytes outside the range of the table are filled with [`Tag::EMPTY`] entries. These will unfortunately + /// trigger a match of [`RawTableInner::find_insert_index_in_group`] function. This is because + /// the `Some(bit)` returned by `group.match_empty_or_deleted().lowest_set_bit()` after masking + /// (`(probe_seq.pos + bit) & self.bucket_mask`) may point to a full bucket that is already occupied. + /// We detect this situation here and perform a second scan starting at the beginning of the table. + /// This second scan is guaranteed to find an empty slot (due to the load factor) before hitting the + /// trailing control bytes (containing [`Tag::EMPTY`] bytes). + /// + /// If this function is called correctly, it is guaranteed to return an index of an empty or + /// deleted bucket in the range `0..self.num_buckets()` (see `Warning` and `Safety`). + /// + /// # Warning + /// + /// The table must have at least 1 empty or deleted `bucket`, otherwise if the table is less than + /// the group width (`self.num_buckets() < Group::WIDTH`) this function returns an index outside of the + /// table indices range `0..self.num_buckets()` (`0..=self.bucket_mask`). Attempt to write data at that + /// index will cause immediate [`undefined behavior`]. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for [`RawTableInner::ctrl`] method. + /// Thus, in order to uphold those safety contracts, as well as for the correct logic of the work + /// of this crate, the following rules are necessary and sufficient: + /// + /// * The [`RawTableInner`] must have properly initialized control bytes otherwise calling this + /// function results in [`undefined behavior`]. + /// + /// * This function must only be used on insertion indices found by [`RawTableInner::find_insert_index_in_group`] + /// (after the `find_insert_index_in_group` function, but before insertion into the table). + /// + /// * The `index` must not be greater than the `self.bucket_mask`, i.e. `(index + 1) <= self.num_buckets()` + /// (this one is provided by the [`RawTableInner::find_insert_index_in_group`] function). + /// + /// Calling this function with an index not provided by [`RawTableInner::find_insert_index_in_group`] + /// may result in [`undefined behavior`] even if the index satisfies the safety rules of the + /// [`RawTableInner::ctrl`] function (`index < self.bucket_mask + 1 + Group::WIDTH`). + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn fix_insert_index(&self, mut index: usize) -> usize { + // SAFETY: The caller of this function ensures that `index` is in the range `0..=self.bucket_mask`. + if unlikely(unsafe { self.is_bucket_full(index) }) { + debug_assert!(self.bucket_mask < Group::WIDTH); + // SAFETY: + // + // * Since the caller of this function ensures that the control bytes are properly + // initialized and `ptr = self.ctrl(0)` points to the start of the array of control + // bytes, therefore: `ctrl` is valid for reads, properly aligned to `Group::WIDTH` + // and points to the properly initialized control bytes (see also + // `TableLayout::calculate_layout_for` and `ptr::read`); + // + // * Because the caller of this function ensures that the index was provided by the + // `self.find_insert_index_in_group()` function, so for for tables larger than the + // group width (self.num_buckets() >= Group::WIDTH), we will never end up in the given + // branch, since `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_index_in_group` + // cannot return a full bucket index. For tables smaller than the group width, calling + // the `unwrap_unchecked` function is also safe, as the trailing control bytes outside + // the range of the table are filled with EMPTY bytes (and we know for sure that there + // is at least one FULL bucket), so this second scan either finds an empty slot (due to + // the load factor) or hits the trailing control bytes (containing EMPTY). + index = unsafe { + Group::load_aligned(self.ctrl(0)) + .match_empty_or_deleted() + .lowest_set_bit() + .unwrap_unchecked() + }; + } + index + } + + /// Finds the position to insert something in a group. + /// + /// **This may have false positives and must be fixed up with `fix_insert_index` + /// before it's used.** + /// + /// The function is guaranteed to return the index of an empty or deleted [`Bucket`] + /// in the range `0..self.num_buckets()` (`0..=self.bucket_mask`). + #[inline] + fn find_insert_index_in_group(&self, group: &Group, probe_seq: &ProbeSeq) -> Option { + let bit = group.match_empty_or_deleted().lowest_set_bit(); + + if likely(bit.is_some()) { + // This is the same as `(probe_seq.pos + bit) % self.num_buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + Some((probe_seq.pos + bit.unwrap()) & self.bucket_mask) + } else { + None + } + } + + /// Searches for an element in the table, or a potential slot where that element could + /// be inserted (an empty or deleted [`Bucket`] index). + /// + /// This uses dynamic dispatch to reduce the amount of code generated, but that is + /// eliminated by LLVM optimizations. + /// + /// This function does not make any changes to the `data` part of the table, or any + /// changes to the `items` or `growth_left` field of the table. + /// + /// The table must have at least 1 empty or deleted `bucket`, otherwise, if the + /// `eq: &mut dyn FnMut(usize) -> bool` function does not return `true`, this function + /// will never return (will go into an infinite loop) for tables larger than the group + /// width, or return an index outside of the table indices range if the table is less + /// than the group width. + /// + /// This function is guaranteed to provide the `eq: &mut dyn FnMut(usize) -> bool` + /// function with only `FULL` buckets' indices and return the `index` of the found + /// element (as `Ok(index)`). If the element is not found and there is at least 1 + /// empty or deleted [`Bucket`] in the table, the function is guaranteed to return + /// an index in the range `0..self.num_buckets()`, but in any case, if this function + /// returns `Err`, it will contain an index in the range `0..=self.num_buckets()`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling + /// this function results in [`undefined behavior`]. + /// + /// Attempt to write data at the index returned by this function when the table is less than + /// the group width and if there was not at least one empty or deleted bucket in the table + /// will cause immediate [`undefined behavior`]. This is because in this case the function + /// will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`] control + /// bytes outside the table range. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn find_or_find_insert_index_inner( + &self, + hash: u64, + eq: &mut dyn FnMut(usize) -> Result, + ) -> Result, E> { + let mut insert_index = None; + + let tag_hash = Tag::full(hash); + let mut probe_seq = self.probe_seq(hash); + + loop { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.num_buckets() - 1` + // of the table due to masking with `self.bucket_mask` and also because the number + // of buckets is a power of two (see `self.probe_seq` function). + // + // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to + // call `Group::load` due to the extended control bytes range, which is + // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control + // byte will never be read for the allocated table); + // + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will + // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // bytes, which is safe (see RawTableInner::new). + let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; + + for bit in group.match_tag(tag_hash) { + let index = (probe_seq.pos + bit) & self.bucket_mask; + + if likely(eq(index)?) { + return Ok(Ok(index)); + } + } + + // We didn't find the element we were looking for in the group, try to get an + // insertion slot from the group if we don't have one yet. + if likely(insert_index.is_none()) { + insert_index = self.find_insert_index_in_group(&group, &probe_seq); + } + + if let Some(insert_index) = insert_index { + // Only stop the search if the group contains at least one empty element. + // Otherwise, the element that we are looking for might be in a following group. + if likely(group.match_empty().any_bit_set()) { + // We must have found a insert slot by now, since the current group contains at + // least one. For tables smaller than the group width, there will still be an + // empty element in the current (and only) group due to the load factor. + unsafe { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * We use this function with the index found by `self.find_insert_index_in_group` + return Ok(Err(self.fix_insert_index(insert_index))); + } + } + } + + probe_seq.move_next(self.bucket_mask); + } + } + + /// Searches for an empty or deleted bucket which is suitable for inserting a new + /// element and sets the hash for that slot. Returns an index of that slot and the + /// old control byte stored in the found index. + /// + /// This function does not check if the given element exists in the table. Also, + /// this function does not check if there is enough space in the table to insert + /// a new element. The caller of the function must make sure that the table has at + /// least 1 empty or deleted `bucket`, otherwise this function will never return + /// (will go into an infinite loop) for tables larger than the group width, or + /// return an index outside of the table indices range if the table is less than + /// the group width. + /// + /// If there is at least 1 empty or deleted `bucket` in the table, the function is + /// guaranteed to return an `index` in the range `0..self.num_buckets()`, but in any case, + /// if this function returns an `index` it will be in the range `0..=self.num_buckets()`. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for the + /// [`RawTableInner::set_ctrl_hash`] and [`RawTableInner::find_insert_index`] methods. + /// Thus, in order to uphold the safety contracts for that methods, as well as for + /// the correct logic of the work of this crate, you must observe the following rules + /// when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated and has properly initialized + /// control bytes otherwise calling this function results in [`undefined behavior`]. + /// + /// * The caller of this function must ensure that the "data" parts of the table + /// will have an entry in the returned index (matching the given hash) right + /// after calling this function. + /// + /// Attempt to write data at the `index` returned by this function when the table is + /// less than the group width and if there was not at least one empty or deleted bucket in + /// the table will cause immediate [`undefined behavior`]. This is because in this case the + /// function will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`] + /// control bytes outside the table range. + /// + /// The caller must independently increase the `items` field of the table, and also, + /// if the old control byte was [`Tag::EMPTY`], then decrease the table's `growth_left` + /// field, and do not change it if the old control byte was [`Tag::DELETED`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn prepare_insert_index(&mut self, hash: u64) -> (usize, Tag) { + unsafe { + // SAFETY: Caller of this function ensures that the control bytes are properly initialized. + let index: usize = self.find_insert_index(hash); + // SAFETY: + // 1. The `find_insert_index` function either returns an `index` less than or + // equal to `self.num_buckets() = self.bucket_mask + 1` of the table, or never + // returns if it cannot find an empty or deleted slot. + // 2. The caller of this function guarantees that the table has already been + // allocated + let old_ctrl = *self.ctrl(index); + self.set_ctrl_hash(index, hash); + (index, old_ctrl) + } + } + + /// Searches for an empty or deleted bucket which is suitable for inserting + /// a new element, returning the `index` for the new [`Bucket`]. + /// + /// This function does not make any changes to the `data` part of the table, or any + /// changes to the `items` or `growth_left` field of the table. + /// + /// The table must have at least 1 empty or deleted `bucket`, otherwise this function + /// will never return (will go into an infinite loop) for tables larger than the group + /// width, or return an index outside of the table indices range if the table is less + /// than the group width. + /// + /// If there is at least 1 empty or deleted `bucket` in the table, the function is + /// guaranteed to return an index in the range `0..self.num_buckets()`, but in any case, + /// it will contain an index in the range `0..=self.num_buckets()`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling + /// this function results in [`undefined behavior`]. + /// + /// Attempt to write data at the index returned by this function when the table is + /// less than the group width and if there was not at least one empty or deleted bucket in + /// the table will cause immediate [`undefined behavior`]. This is because in this case the + /// function will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`] + /// control bytes outside the table range. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn find_insert_index(&self, hash: u64) -> usize { + let mut probe_seq = self.probe_seq(hash); + loop { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.num_buckets() - 1` + // of the table due to masking with `self.bucket_mask` and also because the number + // of buckets is a power of two (see `self.probe_seq` function). + // + // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to + // call `Group::load` due to the extended control bytes range, which is + // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control + // byte will never be read for the allocated table); + // + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will + // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // bytes, which is safe (see RawTableInner::new). + let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; + + let index = self.find_insert_index_in_group(&group, &probe_seq); + if likely(index.is_some()) { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * We use this function with the slot / index found by `self.find_insert_index_in_group` + unsafe { + return self.fix_insert_index(index.unwrap_unchecked()); + } + } + probe_seq.move_next(self.bucket_mask); + } + } + + /// Searches for an element in a table, returning the `index` of the found element. + /// This uses dynamic dispatch to reduce the amount of code generated, but it is + /// eliminated by LLVM optimizations. + /// + /// This function does not make any changes to the `data` part of the table, or any + /// changes to the `items` or `growth_left` field of the table. + /// + /// The table must have at least 1 empty `bucket`, otherwise, if the + /// `eq: &mut dyn FnMut(usize) -> bool` function does not return `true`, + /// this function will also never return (will go into an infinite loop). + /// + /// This function is guaranteed to provide the `eq: &mut dyn FnMut(usize) -> bool` + /// function with only `FULL` buckets' indices and return the `index` of the found + /// element as `Some(index)`, so the index will always be in the range + /// `0..self.num_buckets()`. + /// + /// # Safety + /// + /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling + /// this function results in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline(always)] + unsafe fn find_inner( + &self, + hash: u64, + eq: &mut dyn FnMut(usize) -> Result, + ) -> Result, E> { + let tag_hash = Tag::full(hash); + let mut probe_seq = self.probe_seq(hash); + + loop { + // SAFETY: + // * Caller of this function ensures that the control bytes are properly initialized. + // + // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.num_buckets() - 1` + // of the table due to masking with `self.bucket_mask`. + // + // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to + // call `Group::load` due to the extended control bytes range, which is + // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control + // byte will never be read for the allocated table); + // + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will + // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // bytes, which is safe (see RawTableInner::new_in). + let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; + + for bit in group.match_tag(tag_hash) { + // This is the same as `(probe_seq.pos + bit) % self.num_buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + let index = (probe_seq.pos + bit) & self.bucket_mask; + + if likely(eq(index)?) { + return Ok(Some(index)); + } + } + + if likely(group.match_empty().any_bit_set()) { + return Ok(None); + } + + probe_seq.move_next(self.bucket_mask); + } + } + + /// Prepares for rehashing data in place (that is, without allocating new memory). + /// Converts all full index `control bytes` to `Tag::DELETED` and all `Tag::DELETED` control + /// bytes to `Tag::EMPTY`, i.e. performs the following conversion: + /// + /// - `Tag::EMPTY` control bytes -> `Tag::EMPTY`; + /// - `Tag::DELETED` control bytes -> `Tag::EMPTY`; + /// - `FULL` control bytes -> `Tag::DELETED`. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// You must observe the following safety rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The caller of this function must convert the `Tag::DELETED` bytes back to `FULL` + /// bytes when re-inserting them into their ideal position (which was impossible + /// to do during the first insert due to tombstones). If the caller does not do + /// this, then calling this function may result in a memory leak. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes otherwise + /// calling this function results in [`undefined behavior`]. + /// + /// Calling this function on a table that has not been allocated results in + /// [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn prepare_rehash_in_place(&mut self) { + // Bulk convert all full control bytes to DELETED, and all DELETED control bytes to EMPTY. + // This effectively frees up all buckets containing a DELETED entry. + // + // SAFETY: + // 1. `i` is guaranteed to be within bounds since we are iterating from zero to `buckets - 1`; + // 2. Even if `i` will be `i == self.bucket_mask`, it is safe to call `Group::load_aligned` + // due to the extended control bytes range, which is `self.bucket_mask + 1 + Group::WIDTH`; + // 3. The caller of this function guarantees that [`RawTableInner`] has already been allocated; + // 4. We can use `Group::load_aligned` and `Group::store_aligned` here since we start from 0 + // and go to the end with a step equal to `Group::WIDTH` (see TableLayout::calculate_layout_for). + unsafe { + for i in (0..self.num_buckets()).step_by(Group::WIDTH) { + let group = Group::load_aligned(self.ctrl(i)); + let group = group.convert_special_to_empty_and_full_to_deleted(); + group.store_aligned(self.ctrl(i)); + } + } + + // Fix up the trailing control bytes. See the comments in set_ctrl + // for the handling of tables smaller than the group width. + if unlikely(self.num_buckets() < Group::WIDTH) { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes, + // so copying `self.num_buckets() == self.bucket_mask + 1` bytes with offset equal to + // `Group::WIDTH` is safe + unsafe { + self.ctrl(0) + .copy_to(self.ctrl(Group::WIDTH), self.num_buckets()); + } + } else { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of + // control bytes,so copying `Group::WIDTH` bytes with offset equal + // to `self.num_buckets() == self.bucket_mask + 1` is safe + unsafe { + self.ctrl(0) + .copy_to(self.ctrl(self.num_buckets()), Group::WIDTH); + } + } + } + + /// Returns an iterator over every element in the table. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result + /// is [`undefined behavior`]: + /// + /// * The caller has to ensure that the `RawTableInner` outlives the + /// `RawIter`. Because we cannot make the `next` method unsafe on + /// the `RawIter` struct, we have to make the `iter` method unsafe. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// The type `T` must be the actual type of the elements stored in the table, + /// otherwise using the returned [`RawIter`] results in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn iter(&self) -> RawIter { + // SAFETY: + // 1. Since the caller of this function ensures that the control bytes + // are properly initialized and `self.data_end()` points to the start + // of the array of control bytes, therefore: `ctrl` is valid for reads, + // properly aligned to `Group::WIDTH` and points to the properly initialized + // control bytes. + // 2. `data` bucket index in the table is equal to the `ctrl` index (i.e. + // equal to zero). + // 3. We pass the exact value of buckets of the table to the function. + // + // `ctrl` points here (to the start + // of the first control byte `CT0`) + // ∨ + // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m + // \________ ________/ + // \/ + // `n = buckets - 1`, i.e. `RawTableInner::num_buckets() - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`. + // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + // with loading `Group` bytes from the heap works properly, even if the result + // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + // `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + unsafe { + let data = Bucket::from_base_index(self.data_end(), 0); + RawIter { + // SAFETY: See explanation above + iter: RawIterRange::new(self.ctrl.as_ptr(), data, self.num_buckets()), + items: self.items, + } + } + } + + /// Executes the destructors (if any) of the values stored in the table. + /// + /// # Note + /// + /// This function does not erase the control bytes of the table and does + /// not make any changes to the `items` or `growth_left` fields of the + /// table. If necessary, the caller of this function must manually set + /// up these table fields, for example using the [`clear_no_drop`] function. + /// + /// Be careful during calling this function, because drop function of + /// the elements can panic, and this can leave table in an inconsistent + /// state. + /// + /// # Safety + /// + /// The type `T` must be the actual type of the elements stored in the table, + /// otherwise calling this function may result in [`undefined behavior`]. + /// + /// If `T` is a type that should be dropped and **the table is not empty**, + /// calling this function more than once results in [`undefined behavior`]. + /// + /// If `T` is not [`Copy`], attempting to use values stored in the table after + /// calling this function may result in [`undefined behavior`]. + /// + /// It is safe to call this function on a table that has not been allocated, + /// on a table with uninitialized control bytes, and on a table with no actual + /// data but with `Full` control bytes if `self.items == 0`. + /// + /// See also [`Bucket::drop`] / [`Bucket::as_ptr`] methods, for more information + /// about of properly removing or saving `element` from / into the [`RawTable`] / + /// [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + unsafe fn drop_elements(&mut self) { + // Check that `self.items != 0`. Protects against the possibility + // of creating an iterator on an table with uninitialized control bytes. + if T::NEEDS_DROP && self.items != 0 { + // SAFETY: We know for sure that RawTableInner will outlive the + // returned `RawIter` iterator, and the caller of this function + // must uphold the safety contract for `drop_elements` method. + unsafe { + for item in self.iter::() { + // SAFETY: The caller must uphold the safety contract for + // `drop_elements` method. + item.drop(); + } + } + } + } + + /// Executes the destructors (if any) of the values stored in the table and than + /// deallocates the table. + /// + /// # Note + /// + /// Calling this function automatically makes invalid (dangling) all instances of + /// buckets ([`Bucket`]) and makes invalid (dangling) the `ctrl` field of the table. + /// + /// This function does not make any changes to the `bucket_mask`, `items` or `growth_left` + /// fields of the table. If necessary, the caller of this function must manually set + /// up these table fields. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`undefined behavior`]: + /// + /// * Calling this function more than once; + /// + /// * The type `T` must be the actual type of the elements stored in the table. + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used + /// to allocate this table. + /// + /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` that + /// was used to allocate this table. + /// + /// The caller of this function should pay attention to the possibility of the + /// elements' drop function panicking, because this: + /// + /// * May leave the table in an inconsistent state; + /// + /// * Memory is never deallocated, so a memory leak may occur. + /// + /// Attempt to use the `ctrl` field of the table (dereference) after calling this + /// function results in [`undefined behavior`]. + /// + /// It is safe to call this function on a table that has not been allocated, + /// on a table with uninitialized control bytes, and on a table with no actual + /// data but with `Full` control bytes if `self.items == 0`. + /// + /// See also [`RawTableInner::drop_elements`] or [`RawTableInner::free_buckets`] + /// for more information. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + unsafe fn drop_inner_table(&mut self, alloc: &A, table_layout: TableLayout) { + if !self.is_empty_singleton() { + // SAFETY: The caller must uphold the safety contract for `drop_inner_table` method. + unsafe { + self.drop_elements::(); + } + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. The caller must uphold the safety contract for `drop_inner_table` method. + unsafe { + self.free_buckets(alloc, table_layout); + } + } + } + + /// Returns a pointer to an element in the table (convenience for + /// `Bucket::from_base_index(self.data_end::(), index)`). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived from the + /// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling + /// this function, the following safety rules must be observed: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTableInner::num_buckets`] + /// function, i.e. `(index + 1) <= self.num_buckets()`. + /// + /// * The type `T` must be the actual type of the elements stored in the table, otherwise + /// using the returned [`Bucket`] may result in [`undefined behavior`]. + /// + /// It is safe to call this function with index of zero (`index == 0`) on a table that has + /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the `index` must + /// not be greater than the number returned by the [`RawTable::num_buckets`] function, i.e. + /// `(index + 1) <= self.num_buckets()`. + /// + /// ```none + /// If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::num_buckets() - 1"): + /// + /// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data` + /// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`]) + /// | + /// | `base = table.data_end::()` points here + /// | (to the start of CT0 or to the end of T0) + /// v v + /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + /// ^ \__________ __________/ + /// `table.bucket(3)` returns a pointer that points \/ + /// here in the `data` part of the `RawTableInner` additional control bytes + /// (to the end of T3) `m = Group::WIDTH - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`; + /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask` + /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + /// ``` + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn bucket(&self, index: usize) -> Bucket { + debug_assert_ne!(self.bucket_mask, 0); + debug_assert!(index < self.num_buckets()); + unsafe { Bucket::from_base_index(self.data_end(), index) } + } + + /// Returns a raw `*mut u8` pointer to the start of the `data` element in the table + /// (convenience for `self.data_end::().as_ptr().sub((index + 1) * size_of)`). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`, + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`undefined behavior`]: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTableInner::num_buckets`] + /// function, i.e. `(index + 1) <= self.num_buckets()`; + /// + /// * The `size_of` must be equal to the size of the elements stored in the table; + /// + /// ```none + /// If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::num_buckets() - 1"): + /// + /// `table.bucket_ptr(3, mem::size_of::())` returns a pointer that points here in the + /// `data` part of the `RawTableInner`, i.e. to the start of T3 + /// | + /// | `base = table.data_end::()` points here + /// | (to the start of CT0 or to the end of T0) + /// v v + /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + /// \__________ __________/ + /// \/ + /// additional control bytes + /// `m = Group::WIDTH - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`; + /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask` + /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + /// ``` + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 { + debug_assert_ne!(self.bucket_mask, 0); + debug_assert!(index < self.num_buckets()); + unsafe { + let base: *mut u8 = self.data_end().as_ptr(); + base.sub((index + 1) * size_of) + } + } + + /// Returns pointer to one past last `data` element in the table as viewed from + /// the start point of the allocation (convenience for `self.ctrl.cast()`). + /// + /// This function actually returns a pointer to the end of the `data element` at + /// index "0" (zero). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Note + /// + /// The type `T` must be the actual type of the elements stored in the table, otherwise + /// using the returned [`NonNull`] may result in [`undefined behavior`]. + /// + /// ```none + /// `table.data_end::()` returns pointer that points here + /// (to the end of `T0`) + /// ∨ + /// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m + /// \________ ________/ + /// \/ + /// `n = buckets - 1`, i.e. `RawTableInner::num_buckets() - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`. + /// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + /// with loading `Group` bytes from the heap works properly, even if the result + /// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + /// `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + /// ``` + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + fn data_end(&self) -> NonNull { + self.ctrl.cast() + } + + /// Returns an iterator-like object for a probe sequence on the table. + /// + /// This iterator never terminates, but is guaranteed to visit each bucket + /// group exactly once. The loop using `probe_seq` must terminate upon + /// reaching a group containing an empty bucket. + #[inline] + fn probe_seq(&self, hash: u64) -> ProbeSeq { + ProbeSeq { + // This is the same as `hash as usize % self.num_buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + pos: h1(hash) & self.bucket_mask, + stride: 0, + } + } + + #[inline] + unsafe fn record_item_insert_at(&mut self, index: usize, old_ctrl: Tag, new_ctrl: Tag) { + self.growth_left -= usize::from(old_ctrl.special_is_empty()); + unsafe { + self.set_ctrl(index, new_ctrl); + } + self.items += 1; + } + + #[inline] + fn is_in_same_group(&self, i: usize, new_i: usize, hash: u64) -> bool { + let probe_seq_pos = self.probe_seq(hash).pos; + let probe_index = + |pos: usize| (pos.wrapping_sub(probe_seq_pos) & self.bucket_mask) / Group::WIDTH; + probe_index(i) == probe_index(new_i) + } + + /// Sets a control byte to the hash, and possibly also the replicated control byte at + /// the end of the array. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for [`RawTableInner::set_ctrl`] + /// method. Thus, in order to uphold the safety contracts for the method, you must observe the + /// following rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::num_buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn set_ctrl_hash(&mut self, index: usize, hash: u64) { + unsafe { + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::set_ctrl_hash`] + self.set_ctrl(index, Tag::full(hash)); + } + } + + /// Replaces the hash in the control byte at the given index with the provided one, + /// and possibly also replicates the new control byte at the end of the array of control + /// bytes, returning the old control byte. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// The safety rules are directly derived from the safety rules for [`RawTableInner::set_ctrl_hash`] + /// and [`RawTableInner::ctrl`] methods. Thus, in order to uphold the safety contracts for both + /// methods, you must observe the following rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::num_buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn replace_ctrl_hash(&mut self, index: usize, hash: u64) -> Tag { + unsafe { + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::replace_ctrl_hash`] + let prev_ctrl = *self.ctrl(index); + self.set_ctrl_hash(index, hash); + prev_ctrl + } + } + + /// Sets a control byte, and possibly also the replicated control byte at + /// the end of the array. + /// + /// This function does not make any changes to the `data` parts of the table, + /// or any changes to the `items` or `growth_left` field of the table. + /// + /// # Safety + /// + /// You must observe the following safety rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::num_buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn set_ctrl(&mut self, index: usize, ctrl: Tag) { + // Replicate the first Group::WIDTH control bytes at the end of + // the array without using a branch. If the tables smaller than + // the group width (self.num_buckets() < Group::WIDTH), + // `index2 = Group::WIDTH + index`, otherwise `index2` is: + // + // - If index >= Group::WIDTH then index == index2. + // - Otherwise index2 == self.bucket_mask + 1 + index. + // + // The very last replicated control byte is never actually read because + // we mask the initial index for unaligned loads, but we write it + // anyways because it makes the set_ctrl implementation simpler. + // + // If there are fewer buckets than Group::WIDTH then this code will + // replicate the buckets at the end of the trailing group. For example + // with 2 buckets and a group size of 4, the control bytes will look + // like this: + // + // Real | Replicated + // --------------------------------------------- + // | [A] | [B] | [Tag::EMPTY] | [EMPTY] | [A] | [B] | + // --------------------------------------------- + + // This is the same as `(index.wrapping_sub(Group::WIDTH)) % self.num_buckets() + Group::WIDTH` + // because the number of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + let index2 = ((index.wrapping_sub(Group::WIDTH)) & self.bucket_mask) + Group::WIDTH; + + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::set_ctrl`] + unsafe { + *self.ctrl(index) = ctrl; + *self.ctrl(index2) = ctrl; + } + } + + /// Returns a pointer to a control byte. + /// + /// # Safety + /// + /// For the allocated [`RawTableInner`], the result is [`Undefined Behavior`], + /// if the `index` is greater than the `self.bucket_mask + 1 + Group::WIDTH`. + /// In that case, calling this function with `index == self.bucket_mask + 1 + Group::WIDTH` + /// will return a pointer to the end of the allocated table and it is useless on its own. + /// + /// Calling this function with `index >= self.bucket_mask + 1 + Group::WIDTH` on a + /// table that has not been allocated results in [`Undefined Behavior`]. + /// + /// So to satisfy both requirements you should always follow the rule that + /// `index < self.bucket_mask + 1 + Group::WIDTH` + /// + /// Calling this function on [`RawTableInner`] that are not already allocated is safe + /// for read-only purpose. + /// + /// See also [`Bucket::as_ptr()`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn ctrl(&self, index: usize) -> *mut Tag { + debug_assert!(index < self.num_ctrl_bytes()); + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::ctrl`] + unsafe { self.ctrl.as_ptr().add(index).cast() } + } + + /// Gets the slice of all control bytes, as possibily uninitialized tags. + fn ctrl_slice(&mut self) -> &mut [mem::MaybeUninit] { + // SAFETY: We have the correct number of control bytes. + unsafe { slice::from_raw_parts_mut(self.ctrl.as_ptr().cast(), self.num_ctrl_bytes()) } + } + + #[inline] + fn num_buckets(&self) -> usize { + self.bucket_mask + 1 + } + + /// Checks whether the bucket at `index` is full. + /// + /// # Safety + /// + /// The caller must ensure `index` is less than the number of buckets. + #[inline] + unsafe fn is_bucket_full(&self, index: usize) -> bool { + debug_assert!(index < self.num_buckets()); + unsafe { (*self.ctrl(index)).is_full() } + } + + #[inline] + fn num_ctrl_bytes(&self) -> usize { + self.bucket_mask + 1 + Group::WIDTH + } + + #[inline] + fn is_empty_singleton(&self) -> bool { + self.bucket_mask == 0 + } + + /// Attempts to allocate a new hash table with at least enough capacity + /// for inserting the given number of elements without reallocating, + /// and return it inside `ScopeGuard` to protect against panic in the hash + /// function. + /// + /// # Note + /// + /// It is recommended (but not required): + /// + /// * That the new table's `capacity` be greater than or equal to `self.items`. + /// + /// * The `alloc` is the same [`Allocator`] as the `Allocator` used + /// to allocate this table. + /// + /// * The `table_layout` is the same [`TableLayout`] as the `TableLayout` used + /// to allocate this table. + /// + /// If `table_layout` does not match the `TableLayout` that was used to allocate + /// this table, then using `mem::swap` with the `self` and the new table returned + /// by this function results in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + fn prepare_resize<'a, A>( + &self, + alloc: &'a A, + table_layout: TableLayout, + capacity: usize, + fallibility: Fallibility, + ) -> Result, TryReserveError> + where + A: Allocator, + { + debug_assert!(self.items <= capacity); + + // Allocate and initialize the new table. + let new_table = + RawTableInner::fallible_with_capacity(alloc, table_layout, capacity, fallibility)?; + + // The hash function may panic, in which case we simply free the new + // table without dropping any elements that may have been copied into + // it. + // + // This guard is also used to free the old table on success, see + // the comment at the bottom of this function. + Ok(guard(new_table, move |self_| { + if !self_.is_empty_singleton() { + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. We know for sure that the `alloc` and `table_layout` matches the + // [`Allocator`] and [`TableLayout`] used to allocate this table. + unsafe { self_.free_buckets(alloc, table_layout) }; + } + })) + } + + /// Reserves or rehashes to make room for `additional` more elements. + /// + /// This uses dynamic dispatch to reduce the amount of + /// code generated, but it is eliminated by LLVM optimizations when inlined. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`undefined behavior`]: + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` used + /// to allocate this table. + /// + /// * The `layout` must be the same [`TableLayout`] as the `TableLayout` + /// used to allocate this table. + /// + /// * The `drop` function (`fn(*mut u8)`) must be the actual drop function of + /// the elements stored in the table. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[expect(clippy::inline_always)] + #[inline(always)] + unsafe fn reserve_rehash_inner( + &mut self, + alloc: &A, + additional: usize, + hasher: &dyn Fn(&mut Self, usize) -> u64, + fallibility: Fallibility, + layout: TableLayout, + drop: Option, + ) -> Result<(), TryReserveError> + where + A: Allocator, + { + // Avoid `Option::ok_or_else` because it bloats LLVM IR. + let Some(new_items) = self.items.checked_add(additional) else { + return Err(fallibility.capacity_overflow()); + }; + let full_capacity = bucket_mask_to_capacity(self.bucket_mask); + if new_items <= full_capacity / 2 { + // Rehash in-place without re-allocating if we have plenty of spare + // capacity that is locked up due to DELETED entries. + + // SAFETY: + // 1. We know for sure that `[`RawTableInner`]` has already been allocated + // (since new_items <= full_capacity / 2); + // 2. The caller ensures that `drop` function is the actual drop function of + // the elements stored in the table. + // 3. The caller ensures that `layout` matches the [`TableLayout`] that was + // used to allocate this table. + // 4. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. + unsafe { + self.rehash_in_place(hasher, layout.size, drop); + } + Ok(()) + } else { + // Otherwise, conservatively resize to at least the next size up + // to avoid churning deletes into frequent rehashes. + // + // SAFETY: + // 1. We know for sure that `capacity >= self.items`. + // 2. The caller ensures that `alloc` and `layout` matches the [`Allocator`] and + // [`TableLayout`] that were used to allocate this table. + // 3. The caller ensures that the control bytes of the `RawTableInner` + // are already initialized. + unsafe { + self.resize_inner( + alloc, + usize::max(new_items, full_capacity + 1), + hasher, + fallibility, + layout, + ) + } + } + } + + /// Returns an iterator over full buckets indices in the table. + /// + /// # Safety + /// + /// Behavior is undefined if any of the following conditions are violated: + /// + /// * The caller has to ensure that the `RawTableInner` outlives the + /// `FullBucketsIndices`. Because we cannot make the `next` method + /// unsafe on the `FullBucketsIndices` struct, we have to make the + /// `full_buckets_indices` method unsafe. + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + #[inline(always)] + unsafe fn full_buckets_indices(&self) -> FullBucketsIndices { + // SAFETY: + // 1. Since the caller of this function ensures that the control bytes + // are properly initialized and `self.ctrl(0)` points to the start + // of the array of control bytes, therefore: `ctrl` is valid for reads, + // properly aligned to `Group::WIDTH` and points to the properly initialized + // control bytes. + // 2. The value of `items` is equal to the amount of data (values) added + // to the table. + // + // `ctrl` points here (to the start + // of the first control byte `CT0`) + // ∨ + // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH + // \________ ________/ + // \/ + // `n = buckets - 1`, i.e. `RawTableInner::num_buckets() - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`. + unsafe { + let ctrl = NonNull::new_unchecked(self.ctrl(0).cast::()); + + FullBucketsIndices { + // Load the first group + // SAFETY: See explanation above. + current_group: Group::load_aligned(ctrl.as_ptr().cast()) + .match_full() + .into_iter(), + group_first_index: 0, + ctrl, + items: self.items, + } + } + } + + /// Allocates a new table of a different size and moves the contents of the + /// current table into it. + /// + /// This uses dynamic dispatch to reduce the amount of + /// code generated, but it is eliminated by LLVM optimizations when inlined. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`undefined behavior`]: + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` used + /// to allocate this table; + /// + /// * The `layout` must be the same [`TableLayout`] as the `TableLayout` + /// used to allocate this table; + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// The caller of this function must ensure that `capacity >= self.items` + /// otherwise: + /// + /// * If `self.items != 0`, calling of this function with `capacity == 0` + /// results in [`undefined behavior`]. + /// + /// * If `capacity_to_buckets(capacity) < Group::WIDTH` and + /// `self.items > capacity_to_buckets(capacity)` calling this function + /// results in [`undefined behavior`]. + /// + /// * If `capacity_to_buckets(capacity) >= Group::WIDTH` and + /// `self.items > capacity_to_buckets(capacity)` calling this function + /// are never return (will go into an infinite loop). + /// + /// Note: It is recommended (but not required) that the new table's `capacity` + /// be greater than or equal to `self.items`. In case if `capacity <= self.items` + /// this function can never return. See [`RawTableInner::find_insert_index`] for + /// more information. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[expect(clippy::inline_always)] + #[inline(always)] + unsafe fn resize_inner( + &mut self, + alloc: &A, + capacity: usize, + hasher: &dyn Fn(&mut Self, usize) -> u64, + fallibility: Fallibility, + layout: TableLayout, + ) -> Result<(), TryReserveError> + where + A: Allocator, + { + // SAFETY: We know for sure that `alloc` and `layout` matches the [`Allocator`] and [`TableLayout`] + // that were used to allocate this table. + let mut new_table = self.prepare_resize(alloc, layout, capacity, fallibility)?; + + // SAFETY: We know for sure that RawTableInner will outlive the + // returned `FullBucketsIndices` iterator, and the caller of this + // function ensures that the control bytes are properly initialized. + unsafe { + for full_byte_index in self.full_buckets_indices() { + // This may panic. + let hash = hasher(self, full_byte_index); + + // SAFETY: + // We can use a simpler version of insert() here since: + // 1. There are no DELETED entries. + // 2. We know there is enough space in the table. + // 3. All elements are unique. + // 4. The caller of this function guarantees that `capacity > 0` + // so `new_table` must already have some allocated memory. + // 5. We set `growth_left` and `items` fields of the new table + // after the loop. + // 6. We insert into the table, at the returned index, the data + // matching the given hash immediately after calling this function. + let (new_index, _) = new_table.prepare_insert_index(hash); + + // SAFETY: + // + // * `src` is valid for reads of `layout.size` bytes, since the + // table is alive and the `full_byte_index` is guaranteed to be + // within bounds (see `FullBucketsIndices::next_impl`); + // + // * `dst` is valid for writes of `layout.size` bytes, since the + // caller ensures that `table_layout` matches the [`TableLayout`] + // that was used to allocate old table and we have the `new_index` + // returned by `prepare_insert_index`. + // + // * Both `src` and `dst` are properly aligned. + // + // * Both `src` and `dst` point to different region of memory. + ptr::copy_nonoverlapping( + self.bucket_ptr(full_byte_index, layout.size), + new_table.bucket_ptr(new_index, layout.size), + layout.size, + ); + } + } + + // The hash function didn't panic, so we can safely set the + // `growth_left` and `items` fields of the new table. + new_table.growth_left -= self.items; + new_table.items = self.items; + + // We successfully copied all elements without panicking. Now replace + // self with the new table. The old table will have its memory freed but + // the items will not be dropped (since they have been moved into the + // new table). + // SAFETY: The caller ensures that `table_layout` matches the [`TableLayout`] + // that was used to allocate this table. + mem::swap(self, &mut new_table); + + Ok(()) + } + + /// Rehashes the contents of the table in place (i.e. without changing the + /// allocation). + /// + /// If `hasher` panics then some the table's contents may be lost. + /// + /// This uses dynamic dispatch to reduce the amount of + /// code generated, but it is eliminated by LLVM optimizations when inlined. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`undefined behavior`]: + /// + /// * The `size_of` must be equal to the size of the elements stored in the table; + /// + /// * The `drop` function (`fn(*mut u8)`) must be the actual drop function of + /// the elements stored in the table. + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The [`RawTableInner`] must have properly initialized control bytes. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[cfg_attr(feature = "inline-more", expect(clippy::inline_always))] + #[cfg_attr(feature = "inline-more", inline(always))] + #[cfg_attr(not(feature = "inline-more"), inline)] + unsafe fn rehash_in_place( + &mut self, + hasher: &dyn Fn(&mut Self, usize) -> u64, + size_of: usize, + drop: Option, + ) { + // If the hash function panics then properly clean up any elements + // that we haven't rehashed yet. We unfortunately can't preserve the + // element since we lost their hash and have no way of recovering it + // without risking another panic. + unsafe { + self.prepare_rehash_in_place(); + } + + let mut guard = guard(self, move |self_| { + for i in 0..self_.num_buckets() { + unsafe { + // Any elements that haven't been rehashed yet have a + // DELETED tag. These need to be dropped and have their tag + // reset to EMPTY. + if *self_.ctrl(i) == Tag::DELETED { + self_.set_ctrl(i, Tag::EMPTY); + if let Some(drop) = drop { + drop(self_.bucket_ptr(i, size_of)); + } + self_.items -= 1; + } + } + } + self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items; + }); + + // At this point, DELETED elements are elements that we haven't + // rehashed yet. Find them and re-insert them at their ideal + // position. + 'outer: for i in 0..guard.num_buckets() { + unsafe { + if *guard.ctrl(i) != Tag::DELETED { + continue; + } + } + + let i_p = unsafe { guard.bucket_ptr(i, size_of) }; + + loop { + // Hash the current item + let hash = hasher(*guard, i); + + // Search for a suitable place to put it + // + // SAFETY: Caller of this function ensures that the control bytes + // are properly initialized. + let new_i = unsafe { guard.find_insert_index(hash) }; + + // Probing works by scanning through all of the control + // bytes in groups, which may not be aligned to the group + // size. If both the new and old position fall within the + // same unaligned group, then there is no benefit in moving + // it and we can just continue to the next item. + if likely(guard.is_in_same_group(i, new_i, hash)) { + unsafe { guard.set_ctrl_hash(i, hash) }; + continue 'outer; + } + + let new_i_p = unsafe { guard.bucket_ptr(new_i, size_of) }; + + // We are moving the current item to a new position. Write + // our H2 to the control byte of the new position. + let prev_ctrl = unsafe { guard.replace_ctrl_hash(new_i, hash) }; + if prev_ctrl == Tag::EMPTY { + unsafe { guard.set_ctrl(i, Tag::EMPTY) }; + // If the target slot is empty, simply move the current + // element into the new slot and clear the old control + // byte. + unsafe { + ptr::copy_nonoverlapping(i_p, new_i_p, size_of); + } + continue 'outer; + } + + // If the target slot is occupied, swap the two elements + // and then continue processing the element that we just + // swapped into the old slot. + debug_assert_eq!(prev_ctrl, Tag::DELETED); + unsafe { + ptr::swap_nonoverlapping(i_p, new_i_p, size_of); + } + } + } + + guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items; + + mem::forget(guard); + } + + /// Deallocates the table without dropping any entries. + /// + /// # Note + /// + /// This function must be called only after [`drop_elements`](RawTableInner::drop_elements), + /// else it can lead to leaking of memory. Also calling this function automatically + /// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid + /// (dangling) the `ctrl` field of the table. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`Undefined Behavior`]: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used + /// to allocate this table. + /// + /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` that was used + /// to allocate this table. + /// + /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// [`GlobalAlloc::dealloc`]: stdalloc::alloc::GlobalAlloc::dealloc + /// [`Allocator::deallocate`]: stdalloc::alloc::Allocator::deallocate + #[inline] + unsafe fn free_buckets(&mut self, alloc: &A, table_layout: TableLayout) + where + A: Allocator, + { + unsafe { + // SAFETY: The caller must uphold the safety contract for `free_buckets` + // method. + let (ptr, layout) = self.allocation_info(table_layout); + alloc.deallocate(ptr, layout); + } + } + + /// Returns a pointer to the allocated memory and the layout that was used to + /// allocate the table. + /// + /// # Safety + /// + /// Caller of this function must observe the following safety rules: + /// + /// * The [`RawTableInner`] has already been allocated, otherwise + /// calling this function results in [`undefined behavior`] + /// + /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` + /// that was used to allocate this table. Failure to comply with this condition + /// may result in [`undefined behavior`]. + /// + /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// [`GlobalAlloc::dealloc`]: stdalloc::GlobalAlloc::dealloc + /// [`Allocator::deallocate`]: stdalloc::Allocator::deallocate + #[inline] + unsafe fn allocation_info(&self, table_layout: TableLayout) -> (NonNull, Layout) { + debug_assert!( + !self.is_empty_singleton(), + "this function can only be called on non-empty tables" + ); + + let (layout, ctrl_offset) = { + let option = table_layout.calculate_layout_for(self.num_buckets()); + unsafe { option.unwrap_unchecked() } + }; + ( + // SAFETY: The caller must uphold the safety contract for `allocation_info` method. + unsafe { NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)) }, + layout, + ) + } + + /// Returns the total amount of memory allocated internally by the hash + /// table, in bytes. + /// + /// The returned number is informational only. It is intended to be + /// primarily used for memory profiling. + /// + /// # Safety + /// + /// The `table_layout` must be the same [`TableLayout`] as the `TableLayout` + /// that was used to allocate this table. Failure to comply with this condition + /// may result in [`undefined behavior`]. + /// + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn allocation_size_or_zero(&self, table_layout: TableLayout) -> usize { + if self.is_empty_singleton() { + 0 + } else { + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. The caller ensures that `table_layout` matches the [`TableLayout`] + // that was used to allocate this table. + unsafe { self.allocation_info(table_layout).1.size() } + } + } + + /// Marks all table buckets as empty without dropping their contents. + #[inline] + fn clear_no_drop(&mut self) { + if !self.is_empty_singleton() { + self.ctrl_slice().fill_empty(); + } + self.items = 0; + self.growth_left = bucket_mask_to_capacity(self.bucket_mask); + } + + /// Erases the [`Bucket`]'s control byte at the given index so that it does not + /// triggered as full, decreases the `items` of the table and, if it can be done, + /// increases `self.growth_left`. + /// + /// This function does not actually erase / drop the [`Bucket`] itself, i.e. it + /// does not make any changes to the `data` parts of the table. The caller of this + /// function must take care to properly drop the `data`, otherwise calling this + /// function may result in a memory leak. + /// + /// # Safety + /// + /// You must observe the following safety rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * It must be the full control byte at the given position; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::num_buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// Calling this function on a table with no elements is unspecified, but calling subsequent + /// functions is likely to result in [`undefined behavior`] due to overflow subtraction + /// (`self.items -= 1 cause overflow when self.items == 0`). + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn erase(&mut self, index: usize) { + unsafe { + debug_assert!(self.is_bucket_full(index)); + } + + // This is the same as `index.wrapping_sub(Group::WIDTH) % self.num_buckets()` because + // the number of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`. + let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask; + // SAFETY: + // - The caller must uphold the safety contract for `erase` method; + // - `index_before` is guaranteed to be in range due to masking with `self.bucket_mask` + let (empty_before, empty_after) = unsafe { + ( + Group::load(self.ctrl(index_before)).match_empty(), + Group::load(self.ctrl(index)).match_empty(), + ) + }; + + // Inserting and searching in the map is performed by two key functions: + // + // - The `find_insert_index` function that looks up the index of any `Tag::EMPTY` or `Tag::DELETED` + // slot in a group to be able to insert. If it doesn't find an `Tag::EMPTY` or `Tag::DELETED` + // slot immediately in the first group, it jumps to the next `Group` looking for it, + // and so on until it has gone through all the groups in the control bytes. + // + // - The `find_inner` function that looks for the index of the desired element by looking + // at all the `FULL` bytes in the group. If it did not find the element right away, and + // there is no `Tag::EMPTY` byte in the group, then this means that the `find_insert_index` + // function may have found a suitable slot in the next group. Therefore, `find_inner` + // jumps further, and if it does not find the desired element and again there is no `Tag::EMPTY` + // byte, then it jumps further, and so on. The search stops only if `find_inner` function + // finds the desired element or hits an `Tag::EMPTY` slot/byte. + // + // Accordingly, this leads to two consequences: + // + // - The map must have `Tag::EMPTY` slots (bytes); + // + // - You can't just mark the byte to be erased as `Tag::EMPTY`, because otherwise the `find_inner` + // function may stumble upon an `Tag::EMPTY` byte before finding the desired element and stop + // searching. + // + // Thus it is necessary to check all bytes after and before the erased element. If we are in + // a contiguous `Group` of `FULL` or `Tag::DELETED` bytes (the number of `FULL` or `Tag::DELETED` bytes + // before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as + // `Tag::DELETED` in order for the `find_inner` function to go further. On the other hand, if there + // is at least one `Tag::EMPTY` slot in the `Group`, then the `find_inner` function will still stumble + // upon an `Tag::EMPTY` byte, so we can safely mark our erased byte as `Tag::EMPTY` as well. + // + // Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index` + // and given all of the above, tables smaller than the group width (self.num_buckets() < Group::WIDTH) + // cannot have `Tag::DELETED` bytes. + // + // Note that in this context `leading_zeros` refers to the bytes at the end of a group, while + // `trailing_zeros` refers to the bytes at the beginning of a group. + let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH { + Tag::DELETED + } else { + self.growth_left += 1; + Tag::EMPTY + }; + // SAFETY: the caller must uphold the safety contract for `erase` method. + unsafe { + self.set_ctrl(index, ctrl); + } + self.items -= 1; + } +} + +impl Clone for RawTable { + fn clone(&self) -> Self { + if self.table.is_empty_singleton() { + Self::new_in(self.alloc.clone()) + } else { + // SAFETY: This is safe as we are taking the size of an already allocated table + // and therefore capacity overflow cannot occur, `self.table.num_buckets()` is power + // of two and all allocator errors will be caught inside `RawTableInner::new_uninitialized`. + let result = unsafe { + Self::new_uninitialized( + self.alloc.clone(), + self.table.num_buckets(), + Fallibility::Infallible, + ) + }; + + // SAFETY: The result of calling the `new_uninitialized` function cannot be an error + // because `fallibility == Fallibility::Infallible. + let mut new_table = unsafe { result.unwrap_unchecked() }; + + // SAFETY: + // Cloning elements may fail (the clone function may panic). But we don't + // need to worry about uninitialized control bits, since: + // 1. The number of items (elements) in the table is zero, which means that + // the control bits will not be read by Drop function. + // 2. The `clone_from_spec` method will first copy all control bits from + // `self` (thus initializing them). But this will not affect the `Drop` + // function, since the `clone_from_spec` function sets `items` only after + // successfully cloning all elements. + unsafe { new_table.clone_from_spec(self) }; + new_table + } + } + + fn clone_from(&mut self, source: &Self) { + if source.table.is_empty_singleton() { + let mut old_inner = mem::replace(&mut self.table, RawTableInner::NEW); + unsafe { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If any elements' drop function panics, then there will only be a memory leak, + // because we have replaced the inner table with a new one. + old_inner.drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); + } + } else { + unsafe { + // Make sure that if any panics occurs, we clear the table and + // leave it in an empty state. + let mut self_ = guard(self, |self_| { + self_.clear_no_drop(); + }); + + // First, drop all our elements without clearing the control + // bytes. If this panics then the scope guard will clear the + // table, leaking any elements that were not dropped yet. + // + // This leak is unavoidable: we can't try dropping more elements + // since this could lead to another panic and abort the process. + // + // SAFETY: If something gets wrong we clear our table right after + // dropping the elements, so there is no double drop, since `items` + // will be equal to zero. + self_.table.drop_elements::(); + + // If necessary, resize our table to match the source. + if self_.num_buckets() != source.num_buckets() { + let new_inner = { + let result = RawTableInner::new_uninitialized( + &self_.alloc, + Self::TABLE_LAYOUT, + source.num_buckets(), + Fallibility::Infallible, + ); + result.unwrap_unchecked() + }; + // Replace the old inner with new uninitialized one. It's ok, since if something gets + // wrong `ScopeGuard` will initialize all control bytes and leave empty table. + let mut old_inner = mem::replace(&mut self_.table, new_inner); + if !old_inner.is_empty_singleton() { + // SAFETY: + // 1. We have checked that our table is allocated. + // 2. We know for sure that `alloc` and `table_layout` matches + // the [`Allocator`] and [`TableLayout`] that were used to allocate this table. + old_inner.free_buckets(&self_.alloc, Self::TABLE_LAYOUT); + } + } + + // Cloning elements may fail (the clone function may panic), but the `ScopeGuard` + // inside the `clone_from_impl` function will take care of that, dropping all + // cloned elements if necessary. Our `ScopeGuard` will clear the table. + self_.clone_from_spec(source); + + // Disarm the scope guard if cloning was successful. + ScopeGuard::into_inner(self_); + } + } + } +} + +/// Specialization of `clone_from` for `Copy` types +trait RawTableClone { + unsafe fn clone_from_spec(&mut self, source: &Self); +} +impl RawTableClone for RawTable { + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn clone_from_spec(&mut self, source: &Self) { + unsafe { + self.clone_from_impl(source); + } + } +} + +impl RawTable { + /// Common code for `clone` and `clone_from`. Assumes: + /// - `self.num_buckets() == source.num_buckets()`. + /// - Any existing elements have been dropped. + /// - The control bytes are not initialized yet. + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn clone_from_impl(&mut self, source: &Self) { + // Copy the control bytes unchanged. We do this in a single pass + unsafe { + source + .table + .ctrl(0) + .copy_to_nonoverlapping(self.table.ctrl(0), self.table.num_ctrl_bytes()); + } + + // The cloning of elements may panic, in which case we need + // to make sure we drop only the elements that have been + // cloned so far. + let mut guard = guard((0, &mut *self), |(index, self_)| { + if T::NEEDS_DROP { + for i in 0..*index { + unsafe { + if self_.is_bucket_full(i) { + self_.bucket(i).drop(); + } + } + } + } + }); + + unsafe { + for from in source.iter() { + let index = source.bucket_index(&from); + let to = guard.1.bucket(index); + to.write(from.as_ref().clone()); + + // Update the index in case we need to unwind. + guard.0 = index + 1; + } + } + + // Successfully cloned all items, no need to clean up. + mem::forget(guard); + + self.table.items = source.table.items; + self.table.growth_left = source.table.growth_left; + } +} + +impl Default for RawTable { + #[inline] + fn default() -> Self { + Self::new_in(Default::default()) + } +} + +unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawTable { + #[cfg_attr(feature = "inline-more", inline)] + fn drop(&mut self) { + // SAFETY: + // 1. We call the function only once; + // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`] + // and [`TableLayout`] that were used to allocate this table. + // 3. If the drop function of any elements fails, then only a memory leak will occur, + // and we don't care because we are inside the `Drop` function of the `RawTable`, + // so there won't be any table left in an inconsistent state. + unsafe { + self.table + .drop_inner_table::(&self.alloc, Self::TABLE_LAYOUT); + } + } +} + +impl IntoIterator for RawTable { + type Item = T; + type IntoIter = RawIntoIter; + + #[cfg_attr(feature = "inline-more", inline)] + fn into_iter(self) -> RawIntoIter { + unsafe { + let iter = self.iter(); + self.into_iter_from(iter) + } + } +} + +/// Iterator over a sub-range of a table. Unlike `RawIter` this iterator does +/// not track an item count. +pub struct RawIterRange { + // Mask of full buckets in the current group. Bits are cleared from this + // mask as each element is processed. + current_group: BitMaskIter, + + // Pointer to the buckets for the current group. + data: Bucket, + + // Pointer to the next group of control bytes, + // Must be aligned to the group size. + next_ctrl: *const u8, + + // Pointer one past the last control byte of this range. + end: *const u8, +} + +impl RawIterRange { + /// Returns a `RawIterRange` covering a subset of a table. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`undefined behavior`]: + /// + /// * `ctrl` must be valid for reads, i.e. table outlives the `RawIterRange`; + /// + /// * `ctrl` must be properly aligned to the group size (`Group::WIDTH`); + /// + /// * `ctrl` must point to the array of properly initialized control bytes; + /// + /// * `data` must be the [`Bucket`] at the `ctrl` index in the table; + /// + /// * the value of `len` must be less than or equal to the number of table buckets, + /// and the returned value of `ctrl.as_ptr().add(len).offset_from(ctrl.as_ptr())` + /// must be positive. + /// + /// * The `ctrl.add(len)` pointer must be either in bounds or one + /// byte past the end of the same [allocated table]. + /// + /// * The `len` must be a power of two. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new(ctrl: *const u8, data: Bucket, len: usize) -> Self { + debug_assert_ne!(len, 0); + debug_assert_eq!(ctrl as usize % Group::WIDTH, 0); + // SAFETY: The caller must uphold the safety rules for the [`RawIterRange::new`] + let end = unsafe { ctrl.add(len) }; + + // Load the first group and advance ctrl to point to the next group + // SAFETY: The caller must uphold the safety rules for the [`RawIterRange::new`] + let (current_group, next_ctrl) = unsafe { + ( + Group::load_aligned(ctrl.cast()).match_full(), + ctrl.add(Group::WIDTH), + ) + }; + + Self { + current_group: current_group.into_iter(), + data, + next_ctrl, + end, + } + } + + /// # Safety + /// If `DO_CHECK_PTR_RANGE` is false, caller must ensure that we never try to iterate + /// after yielding all elements. + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn next_impl(&mut self) -> Option> { + loop { + if let Some(index) = self.current_group.next() { + return Some(unsafe { self.data.next_n(index) }); + } + + if DO_CHECK_PTR_RANGE && self.next_ctrl >= self.end { + return None; + } + + // We might read past self.end up to the next group boundary, + // but this is fine because it only occurs on tables smaller + // than the group size where the trailing control bytes are all + // EMPTY. On larger tables self.end is guaranteed to be aligned + // to the group size (since tables are power-of-two sized). + unsafe { + self.current_group = Group::load_aligned(self.next_ctrl.cast()) + .match_full() + .into_iter(); + self.data = self.data.next_n(Group::WIDTH); + self.next_ctrl = self.next_ctrl.add(Group::WIDTH); + } + } + } + + /// Folds every element into an accumulator by applying an operation, + /// returning the final result. + /// + /// `fold_impl()` takes three arguments: the number of items remaining in + /// the iterator, an initial value, and a closure with two arguments: an + /// 'accumulator', and an element. The closure returns the value that the + /// accumulator should have for the next iteration. + /// + /// The initial value is the value the accumulator will have on the first call. + /// + /// After applying this closure to every element of the iterator, `fold_impl()` + /// returns the accumulator. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`Undefined Behavior`]: + /// + /// * The [`RawTableInner`] / [`RawTable`] must be alive and not moved, + /// i.e. table outlives the `RawIterRange`; + /// + /// * The provided `n` value must match the actual number of items + /// in the table. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[expect(clippy::while_let_on_iterator)] + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn fold_impl(mut self, mut n: usize, mut acc: B, mut f: F) -> B + where + F: FnMut(B, Bucket) -> B, + { + loop { + while let Some(index) = self.current_group.next() { + // The returned `index` will always be in the range `0..Group::WIDTH`, + // so that calling `self.data.next_n(index)` is safe (see detailed explanation below). + debug_assert!(n != 0); + let bucket = unsafe { self.data.next_n(index) }; + acc = f(acc, bucket); + n -= 1; + } + + if n == 0 { + return acc; + } + + // SAFETY: The caller of this function ensures that: + // + // 1. The provided `n` value matches the actual number of items in the table; + // 2. The table is alive and did not moved. + // + // Taking the above into account, we always stay within the bounds, because: + // + // 1. For tables smaller than the group width (self.num_buckets() <= Group::WIDTH), + // we will never end up in the given branch, since we should have already + // yielded all the elements of the table. + // + // 2. For tables larger than the group width. The number of buckets is a + // power of two (2 ^ n), Group::WIDTH is also power of two (2 ^ k). Since + // `(2 ^ n) > (2 ^ k)`, than `(2 ^ n) % (2 ^ k) = 0`. As we start from the + // start of the array of control bytes, and never try to iterate after + // getting all the elements, the last `self.current_group` will read bytes + // from the `self.num_buckets() - Group::WIDTH` index. We know also that + // `self.current_group.next()` will always return indices within the range + // `0..Group::WIDTH`. + // + // Knowing all of the above and taking into account that we are synchronizing + // the `self.data` index with the index we used to read the `self.current_group`, + // the subsequent `self.data.next_n(index)` will always return a bucket with + // an index number less than `self.num_buckets()`. + // + // The last `self.next_ctrl`, whose index would be `self.num_buckets()`, will never + // actually be read, since we should have already yielded all the elements of + // the table. + unsafe { + self.current_group = Group::load_aligned(self.next_ctrl.cast()) + .match_full() + .into_iter(); + self.data = self.data.next_n(Group::WIDTH); + self.next_ctrl = self.next_ctrl.add(Group::WIDTH); + } + } + } +} + +// We make raw iterators unconditionally Send and Sync, and let the PhantomData +// in the actual iterator implementations determine the real Send/Sync bounds. +unsafe impl Send for RawIterRange {} +unsafe impl Sync for RawIterRange {} + +impl Clone for RawIterRange { + #[cfg_attr(feature = "inline-more", inline)] + fn clone(&self) -> Self { + Self { + data: self.data.clone(), + next_ctrl: self.next_ctrl, + current_group: self.current_group.clone(), + end: self.end, + } + } +} + +impl Iterator for RawIterRange { + type Item = Bucket; + + #[cfg_attr(feature = "inline-more", inline)] + fn next(&mut self) -> Option> { + unsafe { + // SAFETY: We set checker flag to true. + self.next_impl::() + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + // We don't have an item count, so just guess based on the range size. + let remaining_buckets = if self.end > self.next_ctrl { + unsafe { offset_from(self.end, self.next_ctrl) } + } else { + 0 + }; + + // Add a group width to include the group we are currently processing. + (0, Some(Group::WIDTH + remaining_buckets)) + } +} + +impl FusedIterator for RawIterRange {} + +/// Iterator which returns a raw pointer to every full bucket in the table. +/// +/// For maximum flexibility this iterator is not bound by a lifetime, but you +/// must observe several rules when using it: +/// - You must not free the hash table while iterating (including via growing/shrinking). +/// - It is fine to erase a bucket that has been yielded by the iterator. +/// - Erasing a bucket that has not yet been yielded by the iterator may still +/// result in the iterator yielding that bucket (unless `reflect_remove` is called). +/// - It is unspecified whether an element inserted after the iterator was +/// created will be yielded by that iterator (unless `reflect_insert` is called). +/// - The order in which the iterator yields bucket is unspecified and may +/// change in the future. +pub struct RawIter { + pub iter: RawIterRange, + items: usize, +} + +impl RawIter { + unsafe fn drop_elements(&mut self) { + unsafe { + if T::NEEDS_DROP && self.items != 0 { + for item in self { + item.drop(); + } + } + } + } +} + +impl Clone for RawIter { + #[cfg_attr(feature = "inline-more", inline)] + fn clone(&self) -> Self { + Self { + iter: self.iter.clone(), + items: self.items, + } + } +} +impl Default for RawIter { + #[cfg_attr(feature = "inline-more", inline)] + fn default() -> Self { + // SAFETY: Because the table is static, it always outlives the iter. + unsafe { RawTableInner::NEW.iter() } + } +} + +impl Iterator for RawIter { + type Item = Bucket; + + #[cfg_attr(feature = "inline-more", inline)] + fn next(&mut self) -> Option> { + // Inner iterator iterates over buckets + // so it can do unnecessary work if we already yielded all items. + if self.items == 0 { + return None; + } + + let nxt = unsafe { + // SAFETY: We check number of items to yield using `items` field. + self.iter.next_impl::() + }; + + debug_assert!(nxt.is_some()); + self.items -= 1; + + nxt + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.items, Some(self.items)) + } + + #[inline] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + unsafe { self.iter.fold_impl(self.items, init, f) } + } +} + +impl ExactSizeIterator for RawIter {} +impl FusedIterator for RawIter {} + +/// Iterator which returns an index of every full bucket in the table. +/// +/// For maximum flexibility this iterator is not bound by a lifetime, but you +/// must observe several rules when using it: +/// - You must not free the hash table while iterating (including via growing/shrinking). +/// - It is fine to erase a bucket that has been yielded by the iterator. +/// - Erasing a bucket that has not yet been yielded by the iterator may still +/// result in the iterator yielding index of that bucket. +/// - It is unspecified whether an element inserted after the iterator was +/// created will be yielded by that iterator. +/// - The order in which the iterator yields indices of the buckets is unspecified +/// and may change in the future. +#[derive(Clone)] +pub struct FullBucketsIndices { + // Mask of full buckets in the current group. Bits are cleared from this + // mask as each element is processed. + current_group: BitMaskIter, + + // Initial value of the bytes' indices of the current group (relative + // to the start of the control bytes). + group_first_index: usize, + + // Pointer to the current group of control bytes, + // Must be aligned to the group size (Group::WIDTH). + ctrl: NonNull, + + // Number of elements in the table. + items: usize, +} + +impl Default for FullBucketsIndices { + #[cfg_attr(feature = "inline-more", inline)] + fn default() -> Self { + // SAFETY: Because the table is static, it always outlives the iter. + unsafe { RawTableInner::NEW.full_buckets_indices() } + } +} + +impl FullBucketsIndices { + /// Advances the iterator and returns the next value. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// [`Undefined Behavior`]: + /// + /// * The [`RawTableInner`] / [`RawTable`] must be alive and not moved, + /// i.e. table outlives the `FullBucketsIndices`; + /// + /// * It never tries to iterate after getting all elements. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline(always)] + unsafe fn next_impl(&mut self) -> Option { + loop { + if let Some(index) = self.current_group.next() { + // The returned `self.group_first_index + index` will always + // be in the range `0..self.num_buckets()`. See explanation below. + return Some(self.group_first_index + index); + } + + // SAFETY: The caller of this function ensures that: + // + // 1. It never tries to iterate after getting all the elements; + // 2. The table is alive and did not moved; + // 3. The first `self.ctrl` pointed to the start of the array of control bytes. + // + // Taking the above into account, we always stay within the bounds, because: + // + // 1. For tables smaller than the group width (self.num_buckets() <= Group::WIDTH), + // we will never end up in the given branch, since we should have already + // yielded all the elements of the table. + // + // 2. For tables larger than the group width. The number of buckets is a + // power of two (2 ^ n), Group::WIDTH is also power of two (2 ^ k). Since + // `(2 ^ n) > (2 ^ k)`, than `(2 ^ n) % (2 ^ k) = 0`. As we start from the + // the start of the array of control bytes, and never try to iterate after + // getting all the elements, the last `self.ctrl` will be equal to + // the `self.num_buckets() - Group::WIDTH`, so `self.current_group.next()` + // will always contains indices within the range `0..Group::WIDTH`, + // and subsequent `self.group_first_index + index` will always return a + // number less than `self.num_buckets()`. + unsafe { + self.ctrl = NonNull::new_unchecked(self.ctrl.as_ptr().add(Group::WIDTH)); + } + + // SAFETY: See explanation above. + unsafe { + self.current_group = Group::load_aligned(self.ctrl.as_ptr().cast()) + .match_full() + .into_iter(); + self.group_first_index += Group::WIDTH; + } + } + } +} + +impl Iterator for FullBucketsIndices { + type Item = usize; + + /// Advances the iterator and returns the next value. It is up to + /// the caller to ensure that the `RawTable` outlives the `FullBucketsIndices`, + /// because we cannot make the `next` method unsafe. + #[inline(always)] + fn next(&mut self) -> Option { + // Return if we already yielded all items. + if self.items == 0 { + return None; + } + + // SAFETY: + // 1. We check number of items to yield using `items` field. + // 2. The caller ensures that the table is alive and has not moved. + let nxt = unsafe { self.next_impl() }; + + debug_assert!(nxt.is_some()); + self.items -= 1; + + nxt + } + + #[inline(always)] + fn size_hint(&self) -> (usize, Option) { + (self.items, Some(self.items)) + } +} + +impl ExactSizeIterator for FullBucketsIndices {} +impl FusedIterator for FullBucketsIndices {} + +/// Iterator which consumes a table and returns elements. +pub struct RawIntoIter { + iter: RawIter, + allocation: Option<(NonNull, Layout, A)>, + marker: PhantomData, +} + +impl RawIntoIter { + #[cfg_attr(feature = "inline-more", inline)] + pub fn iter(&self) -> RawIter { + self.iter.clone() + } +} + +unsafe impl Send for RawIntoIter +where + T: Send, + A: Send, +{ +} +unsafe impl Sync for RawIntoIter +where + T: Sync, + A: Sync, +{ +} + +impl Drop for RawIntoIter { + #[cfg_attr(feature = "inline-more", inline)] + fn drop(&mut self) { + unsafe { + // Drop all remaining elements + self.iter.drop_elements(); + + // Free the table + if let Some((ptr, layout, ref alloc)) = self.allocation { + alloc.deallocate(ptr, layout); + } + } + } +} + +impl Default for RawIntoIter { + fn default() -> Self { + Self { + iter: Default::default(), + allocation: None, + marker: PhantomData, + } + } +} +impl Iterator for RawIntoIter { + type Item = T; + + #[cfg_attr(feature = "inline-more", inline)] + fn next(&mut self) -> Option { + unsafe { Some(self.iter.next()?.read()) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl ExactSizeIterator for RawIntoIter {} +impl FusedIterator for RawIntoIter {} + +/// Iterator which consumes elements without freeing the table storage. +pub struct RawDrain<'a, T, A: Allocator = Global> { + iter: RawIter, + + // The table is moved into the iterator for the duration of the drain. This + // ensures that an empty table is left if the drain iterator is leaked + // without dropping. + table: RawTableInner, + orig_table: NonNull, + + // We don't use a &'a mut RawTable because we want RawDrain to be + // covariant over T. + marker: PhantomData<&'a RawTable>, +} + +impl RawDrain<'_, T, A> { + #[cfg_attr(feature = "inline-more", inline)] + pub fn iter(&self) -> RawIter { + self.iter.clone() + } +} + +unsafe impl Send for RawDrain<'_, T, A> +where + T: Send, + A: Send, +{ +} +unsafe impl Sync for RawDrain<'_, T, A> +where + T: Sync, + A: Sync, +{ +} + +impl Drop for RawDrain<'_, T, A> { + #[cfg_attr(feature = "inline-more", inline)] + fn drop(&mut self) { + unsafe { + // Drop all remaining elements. Note that this may panic. + self.iter.drop_elements(); + + // Reset the contents of the table now that all elements have been + // dropped. + self.table.clear_no_drop(); + + // Move the now empty table back to its original location. + self.orig_table + .as_ptr() + .copy_from_nonoverlapping(&raw const self.table, 1); + } + } +} + +impl Iterator for RawDrain<'_, T, A> { + type Item = T; + + #[cfg_attr(feature = "inline-more", inline)] + fn next(&mut self) -> Option { + unsafe { + let item = self.iter.next()?; + Some(item.read()) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl ExactSizeIterator for RawDrain<'_, T, A> {} +impl FusedIterator for RawDrain<'_, T, A> {} + +/// Iterator over occupied buckets that could match a given hash. +/// +/// `RawTable` only stores 7 bits of the hash value, so this iterator may return +/// items that have a hash value different than the one provided. You should +/// always validate the returned values before using them. +/// +/// For maximum flexibility this iterator is not bound by a lifetime, but you +/// must observe several rules when using it: +/// - You must not free the hash table while iterating (including via growing/shrinking). +/// - It is fine to erase a bucket that has been yielded by the iterator. +/// - Erasing a bucket that has not yet been yielded by the iterator may still +/// result in the iterator yielding that bucket. +/// - It is unspecified whether an element inserted after the iterator was +/// created will be yielded by that iterator. +/// - The order in which the iterator yields buckets is unspecified and may +/// change in the future. +pub struct RawIterHash { + inner: RawIterHashIndices, + _marker: PhantomData, +} + +#[derive(Clone)] +pub struct RawIterHashIndices { + // See `RawTableInner`'s corresponding fields for details. + // We can't store a `*const RawTableInner` as it would get + // invalidated by the user calling `&mut` methods on `RawTable`. + bucket_mask: usize, + ctrl: NonNull, + + // The top 7 bits of the hash. + tag_hash: Tag, + + // The sequence of groups to probe in the search. + probe_seq: ProbeSeq, + + group: Group, + + // The elements within the group with a matching tag-hash. + bitmask: BitMaskIter, +} + +impl RawIterHash { + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new(table: &RawTable, hash: u64) -> Self { + RawIterHash { + inner: unsafe { RawIterHashIndices::new(&table.table, hash) }, + _marker: PhantomData, + } + } +} + +impl Clone for RawIterHash { + #[cfg_attr(feature = "inline-more", inline)] + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + _marker: PhantomData, + } + } +} + +impl Default for RawIterHash { + #[cfg_attr(feature = "inline-more", inline)] + fn default() -> Self { + Self { + inner: RawIterHashIndices::default(), + _marker: PhantomData, + } + } +} + +impl Default for RawIterHashIndices { + #[cfg_attr(feature = "inline-more", inline)] + fn default() -> Self { + // SAFETY: Because the table is static, it always outlives the iter. + unsafe { RawIterHashIndices::new(&RawTableInner::NEW, 0) } + } +} + +impl RawIterHashIndices { + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new(table: &RawTableInner, hash: u64) -> Self { + let tag_hash = Tag::full(hash); + let probe_seq = table.probe_seq(hash); + let group = unsafe { Group::load(table.ctrl(probe_seq.pos)) }; + let bitmask = group.match_tag(tag_hash).into_iter(); + + RawIterHashIndices { + bucket_mask: table.bucket_mask, + ctrl: table.ctrl, + tag_hash, + probe_seq, + group, + bitmask, + } + } +} + +impl Iterator for RawIterHash { + type Item = Bucket; + + fn next(&mut self) -> Option> { + unsafe { + match self.inner.next() { + Some(index) => { + // Can't use `RawTable::bucket` here as we don't have + // an actual `RawTable` reference to use. + debug_assert!(index <= self.inner.bucket_mask); + let bucket = Bucket::from_base_index(self.inner.ctrl.cast(), index); + Some(bucket) + } + None => None, + } + } + } +} + +impl Iterator for RawIterHashIndices { + type Item = usize; + + fn next(&mut self) -> Option { + unsafe { + loop { + if let Some(bit) = self.bitmask.next() { + let index = (self.probe_seq.pos + bit) & self.bucket_mask; + return Some(index); + } + if likely(self.group.match_empty().any_bit_set()) { + return None; + } + self.probe_seq.move_next(self.bucket_mask); + + // Can't use `RawTableInner::ctrl` here as we don't have + // an actual `RawTableInner` reference to use. + let index = self.probe_seq.pos; + debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH); + let group_ctrl = self.ctrl.as_ptr().add(index).cast(); + + self.group = Group::load(group_ctrl); + self.bitmask = self.group.match_tag(self.tag_hash).into_iter(); + } + } + } +} + +pub struct RawExtractIf<'a, T, A: Allocator> { + pub iter: RawIter, + pub table: &'a mut RawTable, +} + +impl RawExtractIf<'_, T, A> { + #[cfg_attr(feature = "inline-more", inline)] + pub fn next(&mut self, mut f: F) -> Option + where + F: FnMut(&mut T) -> bool, + { + unsafe { + for item in &mut self.iter { + if f(item.as_mut()) { + return Some(self.table.remove(item).0); + } + } + } + None + } +} + +#[cfg(test)] +mod test_map { + use super::*; + + #[test] + fn test_prev_pow2() { + // Skip 0, not defined for that input. + let mut pow2: usize = 1; + while (pow2 << 1) > 0 { + let next_pow2 = pow2 << 1; + assert_eq!(pow2, prev_pow2(pow2)); + // Need to skip 2, because it's also a power of 2, so it doesn't + // return the previous power of 2. + if next_pow2 > 2 { + assert_eq!(pow2, prev_pow2(pow2 + 1)); + assert_eq!(pow2, prev_pow2(next_pow2 - 1)); + } + pow2 = next_pow2; + } + } + + #[test] + fn test_minimum_capacity_for_small_types() { + #[track_caller] + fn test_t() { + let raw_table: RawTable = RawTable::with_capacity(1); + let actual_buckets = raw_table.num_buckets(); + let min_buckets = Group::WIDTH / core::mem::size_of::(); + assert!( + actual_buckets >= min_buckets, + "expected at least {min_buckets} buckets, got {actual_buckets} buckets" + ); + } + + test_t::(); + + // This is only "small" for some platforms, like x86_64 with SSE2, but + // there's no harm in running it on other platforms. + test_t::(); + } + + fn rehash_in_place(table: &mut RawTable, hasher: impl Fn(&T) -> u64) { + unsafe { + table.table.rehash_in_place( + &|table, index| hasher(table.bucket::(index).as_ref()), + mem::size_of::(), + if mem::needs_drop::() { + Some(|ptr| ptr::drop_in_place(ptr.cast::())) + } else { + None + }, + ); + } + } + + #[test] + fn rehash() { + let mut table = RawTable::new(); + let hasher = |i: &u64| *i; + for i in 0..100 { + table.insert(i, i, hasher); + } + + for i in 0..100 { + unsafe { + assert_eq!( + table + .find(i, |x| Ok::<_, ()>(*x == i)) + .unwrap() + .map(|b| b.read()), + Some(i) + ); + } + assert!(table + .find(i + 100, |x| Ok::<_, ()>(*x == i + 100)) + .unwrap() + .is_none()); + } + + rehash_in_place(&mut table, hasher); + + for i in 0..100 { + unsafe { + assert_eq!( + table + .find(i, |x| Ok::<_, ()>(*x == i)) + .unwrap() + .map(|b| b.read()), + Some(i) + ); + } + assert!(table + .find(i + 100, |x| Ok::<_, ()>(*x == i + 100)) + .unwrap() + .is_none()); + } + } + + /// CHECKING THAT WE ARE NOT TRYING TO READ THE MEMORY OF + /// AN UNINITIALIZED TABLE DURING THE DROP + #[test] + fn test_drop_uninitialized() { + use std::vec::Vec; + + let table = unsafe { + // SAFETY: The `buckets` is power of two and we're not + // trying to actually use the returned RawTable. + RawTable::<(u64, Vec)>::new_uninitialized(Global, 8, Fallibility::Infallible) + .unwrap() + }; + drop(table); + } + + /// CHECKING THAT WE DON'T TRY TO DROP DATA IF THE `ITEMS` + /// ARE ZERO, EVEN IF WE HAVE `FULL` CONTROL BYTES. + #[test] + fn test_drop_zero_items() { + use std::vec::Vec; + unsafe { + // SAFETY: The `buckets` is power of two and we're not + // trying to actually use the returned RawTable. + let mut table = + RawTable::<(u64, Vec)>::new_uninitialized(Global, 8, Fallibility::Infallible) + .unwrap(); + + // WE SIMULATE, AS IT WERE, A FULL TABLE. + + // SAFETY: We checked that the table is allocated and therefore the table already has + // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for) + // so writing `table.table.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe. + table.table.ctrl_slice().fill_empty(); + + // SAFETY: table.capacity() is guaranteed to be smaller than table.num_buckets() + table.table.ctrl(0).write_bytes(0, table.capacity()); + + // Fix up the trailing control bytes. See the comments in set_ctrl + // for the handling of tables smaller than the group width. + if table.num_buckets() < Group::WIDTH { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes, + // so copying `self.num_buckets() == self.bucket_mask + 1` bytes with offset equal to + // `Group::WIDTH` is safe + table + .table + .ctrl(0) + .copy_to(table.table.ctrl(Group::WIDTH), table.table.num_buckets()); + } else { + // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of + // control bytes,so copying `Group::WIDTH` bytes with offset equal + // to `self.num_buckets() == self.bucket_mask + 1` is safe + table + .table + .ctrl(0) + .copy_to(table.table.ctrl(table.table.num_buckets()), Group::WIDTH); + } + drop(table); + } + } + + /// CHECKING THAT WE DON'T TRY TO DROP DATA IF THE `ITEMS` + /// ARE ZERO, EVEN IF WE HAVE `FULL` CONTROL BYTES. + #[test] + #[cfg(panic = "unwind")] + fn test_catch_panic_clone_from() { + use super::{AllocError, Allocator, Global}; + use core::sync::atomic::{AtomicI8, Ordering}; + use std::sync::Arc; + use std::thread; + use std::vec::Vec; + + struct MyAllocInner { + drop_count: Arc, + } + + #[derive(Clone)] + struct MyAlloc { + _inner: Arc, + } + + impl Drop for MyAllocInner { + fn drop(&mut self) { + println!("MyAlloc freed."); + self.drop_count.fetch_sub(1, Ordering::SeqCst); + } + } + + unsafe impl Allocator for MyAlloc { + fn allocate(&self, layout: Layout) -> std::result::Result, AllocError> { + let g = Global; + g.allocate(layout) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + unsafe { + let g = Global; + g.deallocate(ptr, layout); + } + } + } + + const DISARMED: bool = false; + const ARMED: bool = true; + + struct CheckedCloneDrop { + panic_in_clone: bool, + dropped: bool, + need_drop: Vec, + } + + impl Clone for CheckedCloneDrop { + fn clone(&self) -> Self { + if self.panic_in_clone { + panic!("panic in clone") + } + Self { + panic_in_clone: self.panic_in_clone, + dropped: self.dropped, + need_drop: self.need_drop.clone(), + } + } + } + + impl Drop for CheckedCloneDrop { + fn drop(&mut self) { + if self.dropped { + panic!("double drop"); + } + self.dropped = true; + } + } + + let dropped: Arc = Arc::new(AtomicI8::new(2)); + + let mut table = RawTable::new_in(MyAlloc { + _inner: Arc::new(MyAllocInner { + drop_count: dropped.clone(), + }), + }); + + for (idx, panic_in_clone) in core::iter::repeat_n(DISARMED, 7).enumerate() { + let idx = idx as u64; + table.insert( + idx, + ( + idx, + CheckedCloneDrop { + panic_in_clone, + dropped: false, + need_drop: vec![idx], + }, + ), + |(k, _)| *k, + ); + } + + assert_eq!(table.len(), 7); + + thread::scope(|s| { + let result = s.spawn(|| { + let armed_flags = [ + DISARMED, DISARMED, ARMED, DISARMED, DISARMED, DISARMED, DISARMED, + ]; + let mut scope_table = RawTable::new_in(MyAlloc { + _inner: Arc::new(MyAllocInner { + drop_count: dropped.clone(), + }), + }); + for (idx, &panic_in_clone) in armed_flags.iter().enumerate() { + let idx = idx as u64; + scope_table.insert( + idx, + ( + idx, + CheckedCloneDrop { + panic_in_clone, + dropped: false, + need_drop: vec![idx + 100], + }, + ), + |(k, _)| *k, + ); + } + table.clone_from(&scope_table); + }); + assert!(result.join().is_err()); + }); + + // Let's check that all iterators work fine and do not return elements + // (especially `RawIterRange`, which does not depend on the number of + // elements in the table, but looks directly at the control bytes) + // + // SAFETY: We know for sure that `RawTable` will outlive + // the returned `RawIter / RawIterRange` iterator. + assert_eq!(table.len(), 0); + assert_eq!(unsafe { table.iter().count() }, 0); + assert_eq!(unsafe { table.iter().iter.count() }, 0); + + for idx in 0..table.num_buckets() { + let idx = idx as u64; + assert!( + table + .find(idx, |(k, _)| Ok::<_, ()>(*k == idx)) + .unwrap() + .is_none(), + "Index: {idx}" + ); + } + + // All allocator clones should already be dropped. + assert_eq!(dropped.load(Ordering::SeqCst), 1); + } +} diff --git a/src/hashbrown/scopeguard.rs b/src/hashbrown/scopeguard.rs new file mode 100644 index 0000000..26532b8 --- /dev/null +++ b/src/hashbrown/scopeguard.rs @@ -0,0 +1,72 @@ +// Extracted from the scopeguard crate +use core::{ + mem::ManuallyDrop, + ops::{Deref, DerefMut}, + ptr, +}; + +pub(crate) struct ScopeGuard +where + F: FnMut(&mut T), +{ + dropfn: F, + value: T, +} + +#[inline] +pub(crate) fn guard(value: T, dropfn: F) -> ScopeGuard +where + F: FnMut(&mut T), +{ + ScopeGuard { dropfn, value } +} + +impl ScopeGuard +where + F: FnMut(&mut T), +{ + #[inline] + pub(crate) fn into_inner(guard: Self) -> T { + // Cannot move out of Drop-implementing types, so + // ptr::read the value out of a ManuallyDrop + // Don't use mem::forget as that might invalidate value + let guard = ManuallyDrop::new(guard); + unsafe { + let value = ptr::read(&raw const guard.value); + // read the closure so that it is dropped + let _ = ptr::read(&raw const guard.dropfn); + value + } + } +} + +impl Deref for ScopeGuard +where + F: FnMut(&mut T), +{ + type Target = T; + #[inline] + fn deref(&self) -> &T { + &self.value + } +} + +impl DerefMut for ScopeGuard +where + F: FnMut(&mut T), +{ + #[inline] + fn deref_mut(&mut self) -> &mut T { + &mut self.value + } +} + +impl Drop for ScopeGuard +where + F: FnMut(&mut T), +{ + #[inline] + fn drop(&mut self) { + (self.dropfn)(&mut self.value); + } +} diff --git a/src/hashbrown/util.rs b/src/hashbrown/util.rs new file mode 100644 index 0000000..880913b --- /dev/null +++ b/src/hashbrown/util.rs @@ -0,0 +1,9 @@ +pub(crate) use std::hint::likely; +pub(crate) use std::hint::unlikely; + +// FIXME: use strict provenance functions once they are stable. +// Implement it with a transmute for now. +#[inline(always)] +pub(crate) fn invalid_mut(addr: usize) -> *mut T { + unsafe { core::mem::transmute(addr) } +} diff --git a/src/internal/alias.rs b/src/internal/alias.rs new file mode 100644 index 0000000..cb2ac4f --- /dev/null +++ b/src/internal/alias.rs @@ -0,0 +1,13 @@ +/// Type alias for `pyo3::Py` +pub type PyObject = pyo3::Py; + +/// Type alias for `pyo3::Bound<'a, pyo3::PyAny>` +pub type BoundObject<'a> = pyo3::Bound<'a, pyo3::PyAny>; + +/// Type alias for `&'a pyo3::Bound<'a, pyo3::types::PyTuple>`. +/// Use it directly as `args` argument type. +pub type BoundArgs<'a> = &'a pyo3::Bound<'a, pyo3::types::PyTuple>; + +/// Type alias for `&'a pyo3::Bound<'a, pyo3::types::PyDict>`. +/// Use it directly as `kwds` argument type. +pub type BoundKwargs<'a> = &'a pyo3::Bound<'a, pyo3::types::PyDict>; diff --git a/src/internal/genver.rs b/src/internal/genver.rs new file mode 100644 index 0000000..8f161f1 --- /dev/null +++ b/src/internal/genver.rs @@ -0,0 +1,27 @@ +use std::sync::atomic; +use std::sync::Arc; + +/// Generation version implementation +/// +/// Very useful for checking changes while iteration, like what CPython does; +/// because we can't use lifetimes. +#[derive(Debug, Clone)] +#[repr(transparent)] +pub struct GenerationVersion(Arc); + +impl GenerationVersion { + #[inline] + pub fn new() -> Self { + Self(Default::default()) + } + + #[inline] + pub fn increment(&self) -> u32 { + self.0.fetch_add(1, atomic::Ordering::SeqCst) + } + + #[inline] + pub fn get(&self) -> u32 { + self.0.load(atomic::Ordering::Relaxed) + } +} diff --git a/src/internal/mod.rs b/src/internal/mod.rs new file mode 100644 index 0000000..c4c32d5 --- /dev/null +++ b/src/internal/mod.rs @@ -0,0 +1,5 @@ +pub mod alias; +pub mod genver; +pub mod onceinit; +pub mod pickle; +pub mod utils; diff --git a/src/internal/onceinit.rs b/src/internal/onceinit.rs new file mode 100644 index 0000000..ae1bde0 --- /dev/null +++ b/src/internal/onceinit.rs @@ -0,0 +1,77 @@ +use std::cell; +use std::mem; +use std::sync::atomic; + +const UNINIT: u8 = 0; +const RUNNING: u8 = 1; +const INIT: u8 = 2; + +#[repr(align(64))] +pub struct OnceInit { + state: atomic::AtomicU8, + value: cell::UnsafeCell>>, +} + +impl OnceInit { + #[inline] + pub fn uninit() -> Self { + Self { + state: atomic::AtomicU8::new(UNINIT), + value: cell::UnsafeCell::new(mem::MaybeUninit::uninit()), + } + } + + #[inline] + pub fn set(&self, val: T) { + if self + .state + .compare_exchange( + UNINIT, + RUNNING, + atomic::Ordering::Acquire, + atomic::Ordering::Relaxed, + ) + .is_err() + { + already_init_panic(); + } + // SAFETY: we own the RUNNING token — no other thread can write value. + unsafe { (*self.value.get()).write(parking_lot::Mutex::new(val)) }; + self.state.store(INIT, atomic::Ordering::Release); + } + + #[inline] + pub fn lock(&self) -> parking_lot::MutexGuard<'_, T> { + if std::hint::likely(self.state.load(atomic::Ordering::Acquire) == INIT) { + // SAFETY: state == INIT guarantees `value` was fully written and is valid. + unsafe { (*self.value.get()).assume_init_ref().lock() } + } else { + not_init_panic() + } + } +} + +// SAFETY: Mutex is Send+Sync when T: Send; we uphold the init invariant ourselves. +unsafe impl Send for OnceInit {} +unsafe impl Sync for OnceInit {} + +impl Drop for OnceInit { + fn drop(&mut self) { + if *self.state.get_mut() == INIT { + // SAFETY: state == INIT means value was written and not yet dropped. + unsafe { (*self.value.get()).assume_init_drop() } + } + } +} + +#[cold] +#[inline(never)] +fn not_init_panic() -> ! { + panic!("Object not initialized (__init__ not called)") +} + +#[cold] +#[inline(never)] +fn already_init_panic() -> ! { + panic!("Object already initialized") +} diff --git a/src/internal/pickle.rs b/src/internal/pickle.rs new file mode 100644 index 0000000..873985a --- /dev/null +++ b/src/internal/pickle.rs @@ -0,0 +1,73 @@ +use std::ptr; + +use crate::internal::alias; + +/// Pickle object +pub struct Pickle( + // Always is tuple + alias::PyObject, +); + +pub struct PickleBuilder { + // Always is tuple + tuple: ptr::NonNull, + size: isize, + current: isize, +} + +impl Pickle { + pub fn builder(py: pyo3::Python, size: isize) -> pyo3::PyResult { + let tuple = unsafe { pyo3::ffi::PyTuple_New(size) }; + + if tuple.is_null() { + Err(pyo3::PyErr::fetch(py)) + } else { + Ok(PickleBuilder { + tuple: unsafe { ptr::NonNull::new_unchecked(tuple) }, + size, + current: 0, + }) + } + } +} + +impl From for alias::PyObject { + fn from(value: Pickle) -> Self { + value.0 + } +} + +impl PickleBuilder { + pub fn unsigned(&mut self, val: usize) -> &mut Self { + debug_assert!(self.current < self.size); + + unsafe { + let x = pyo3::ffi::PyLong_FromSize_t(val); + debug_assert!(!x.is_null()); + + debug_assert!(pyo3::ffi::PyTuple_SetItem(self.tuple.as_ptr(), self.current, x) == 0); + } + + self.current += 1; + self + } + + pub fn signed(&mut self, val: isize) -> &mut Self { + debug_assert!(self.current < self.size); + + unsafe { + let x = pyo3::ffi::PyLong_FromSsize_t(val); + debug_assert!(!x.is_null()); + + debug_assert!(pyo3::ffi::PyTuple_SetItem(self.tuple.as_ptr(), self.current, x) == 0); + } + + self.current += 1; + self + } + + pub fn finish(self, py: pyo3::Python) -> Pickle { + let bound = unsafe { pyo3::Bound::from_owned_ptr(py, self.tuple.as_ptr()) }; + Pickle(bound.unbind()) + } +} diff --git a/src/internal/utils.rs b/src/internal/utils.rs new file mode 100644 index 0000000..b5dfba5 --- /dev/null +++ b/src/internal/utils.rs @@ -0,0 +1,18 @@ +/// It can use as PyO3 function argument. When an argument is specified, you will get [`OptionalArgument::Defined`], +/// otherwise you will get [`OptionalArgument::Undefined`]. +/// +/// It can be used instead of [`Option`] to improve performance. +pub enum OptionalArgument<'a> { + /// The argument was not provided by the caller. + Undefined, + /// The argument was provided and holds the bound Python object. + Defined(pyo3::Bound<'a, pyo3::PyAny>), +} + +impl<'a, 'py> pyo3::FromPyObject<'a, 'py> for OptionalArgument<'py> { + type Error = pyo3::PyErr; + + fn extract(obj: pyo3::Borrowed<'a, 'py, pyo3::PyAny>) -> Result { + Ok(Self::Defined(obj.to_owned())) + } +} diff --git a/src/lazyheap.rs b/src/lazyheap.rs deleted file mode 100644 index 3e8304e..0000000 --- a/src/lazyheap.rs +++ /dev/null @@ -1,190 +0,0 @@ -use crate::common::NoLifetimeSliceIter; -use std::ptr::NonNull; - -/// A heap data structure that lazily maintains sorting order. -/// -/// `LazyHeap` allows for efficient insertion of elements without immediately sorting, -/// with the ability to defer sorting until necessary. This can improve performance -/// in scenarios where sorting is not immediately required. -/// -/// ``` -/// let mut heap = LazyHeap::new(); -/// heap.push(5); -/// ``` -pub struct LazyHeap { - data: std::collections::VecDeque>, - is_sorted: bool, -} - -/// An iterator for traversing elements in a `LazyHeap`. -/// -/// This iterator allows sequential access to the elements of a `LazyHeap`, -/// maintaining the current position and total length during iteration. -/// -/// # Safety -/// -/// This iterator uses raw pointers and requires careful management to ensure -/// memory safety and prevent use-after-free or dangling pointer scenarios. -pub struct Iter { - first: NoLifetimeSliceIter>, - second: NoLifetimeSliceIter>, -} - -impl LazyHeap { - pub fn new() -> Self { - Self { - data: std::collections::VecDeque::new(), - is_sorted: true, - } - } - - #[inline] - pub fn queue_sort(&mut self) { - self.is_sorted = false; - } - - #[inline] - pub fn front(&self) -> Option<&NonNull> { - debug_assert!(self.is_sorted, "heap not sorted"); - self.data.front() - } - - #[inline] - pub fn push(&mut self, value: T) -> NonNull { - unsafe { - let node: NonNull = NonNull::new_unchecked(Box::into_raw(Box::new(value))).cast(); - - self.data.push_back(node); - self.is_sorted = false; - - node - } - } - - #[inline] - pub fn sort_by(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) { - if self.is_sorted { - return; - } - - if self.data.len() > 1 { - unsafe { - self.data - .make_contiguous() - .sort_by(|a, b| compare(a.as_ref(), b.as_ref())); - } - } - - self.is_sorted = true; - } - - #[inline] - fn unlink_front(&mut self) -> Option { - let node = self.data.pop_front()?; - let node = unsafe { Box::from_raw(node.as_ptr()) }; - Some(*node) - } - - #[inline] - pub fn pop_front(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option { - self.sort_by(compare); - self.unlink_front() - } - - #[inline] - fn unlink_back(&mut self) -> Option { - let node = self.data.pop_back()?; - let node = unsafe { Box::from_raw(node.as_ptr()) }; - Some(*node) - } - - #[inline] - pub fn pop_back(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option { - self.sort_by(compare); - self.unlink_back() - } - - #[inline] - pub fn get(&self, index: usize) -> Option<&NonNull> { - self.data.get(index) - } - - #[inline] - pub fn remove(&mut self, node: NonNull, compare: F) -> T - where - F: Fn(&T, &T) -> std::cmp::Ordering, - { - debug_assert!(!self.data.is_empty()); - - if self.data.len() == 1 { - return self.pop_back(compare).unwrap(); - } - - self.sort_by(compare); - - let index = self.data.iter().position(|x| node == *x).unwrap(); - - let node = unsafe { self.data.remove(index).unwrap_unchecked() }; - let boxed_node = unsafe { Box::from_raw(node.as_ptr()) }; - *boxed_node - } - - #[inline] - pub fn clear(&mut self) { - while self.unlink_back().is_some() {} - self.is_sorted = true; - } - - #[inline] - pub fn shrink_to_fit(&mut self) { - self.data.shrink_to_fit(); - } - - #[inline] - pub fn iter(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Iter { - self.sort_by(compare); - - let (a, b) = self.data.as_slices(); - - Iter { - first: NoLifetimeSliceIter::new(a), - second: NoLifetimeSliceIter::new(b), - } - } -} - -impl Drop for LazyHeap { - fn drop(&mut self) { - struct DropGuard<'a, T>(&'a mut LazyHeap); - - impl Drop for DropGuard<'_, T> { - fn drop(&mut self) { - // Continue the same loop we do below. This only runs when a destructor has - // panicked. If another one panics this will abort. - while self.0.unlink_back().is_some() {} - } - } - - // Wrap self so that if a destructor panics, we can try to keep looping - let guard = DropGuard(self); - while guard.0.unlink_back().is_some() {} - core::mem::forget(guard); - } -} - -impl Iterator for Iter { - type Item = NonNull; - - #[inline] - fn next(&mut self) -> Option { - match self.first.next() { - Some(val) => Some(unsafe { *val.as_ptr() }), - None => { - core::mem::swap(&mut self.first, &mut self.second); - self.first.next().map(|x| unsafe { *x.as_ptr() }) - } - } - } -} - -unsafe impl Send for Iter {} diff --git a/src/lib.rs b/src/lib.rs index bff475d..5e3ead1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,58 +1,28 @@ -mod lazyheap; -mod linked_list; +#![feature(allocator_api)] +#![feature(dropck_eyepatch)] +#![feature(likely_unlikely)] +#![feature(optimize_attribute)] #[macro_use] -mod common; +mod macro_rules; -mod bridge; -mod policies; +pub mod hashbrown; +pub mod internal; +pub mod pyclasses; -#[cfg(feature = "mimalloc")] -#[global_allocator] -static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; - -/// cachebox core ( written in Rust ) -#[pyo3::pymodule(gil_used = false)] +#[pyo3::pymodule] mod _core { - use pyo3::types::PyModuleMethods; - - #[pymodule_export] - use super::bridge::TTLPair; - - #[pymodule_export] - use super::bridge::BaseCacheImpl; - - #[pymodule_export] - use super::bridge::cache::Cache; - - #[pymodule_export] - use super::bridge::fifocache::FIFOCache; + // use crate::typeref; - #[pymodule_export] - use super::bridge::rrcache::RRCache; + // #[pymodule_export] + // use crate::pyclasses::base::{PyBaseCacheImpl, PyBaseIteratorImpl}; - #[pymodule_export] - use super::bridge::lrucache::LRUCache; - - #[pymodule_export] - use super::bridge::lfucache::LFUCache; - - #[pymodule_export] - use super::bridge::ttlcache::TTLCache; - - #[pymodule_export] - use super::bridge::vttlcache::VTTLCache; + // #[pymodule_export] + // use crate::pyclasses::cache::{PyCache, PyCacheItems, PyCacheKeys, PyCacheValues}; #[pymodule_init] - fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { - m.add("__author__", env!("CARGO_PKG_AUTHORS"))?; - m.add("__version__", env!("CARGO_PKG_VERSION"))?; - - m.add( - "CoreKeyError", - m.py().get_type::(), - )?; - + pub fn init(_m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { + // typeref::initialize_typeref(m.py()); Ok(()) } } diff --git a/src/linked_list.rs b/src/linked_list.rs deleted file mode 100644 index 6c78280..0000000 --- a/src/linked_list.rs +++ /dev/null @@ -1,206 +0,0 @@ -use crate::common::PreHashObject; -use std::ptr::NonNull; - -/// A doubly-linked list implementation with optional head and tail nodes. -/// -/// This list maintains references to the first and last nodes, and tracks the total number of elements. -/// Uses `NonNull` pointers for efficient memory management and allows for constant-time -/// insertion and deletion at both ends of the list. -pub struct LinkedList { - pub head: Option>, // front - pub tail: Option>, // back - len: usize, -} - -/// A node in a doubly-linked list, containing a reference to the previous and next nodes, -/// and storing a key-value pair as its element. -/// -/// The node uses `NonNull` pointers for efficient memory management and allows for -/// constant-time insertion and deletion in the linked list. -pub struct Node { - pub prev: Option>, - pub next: Option>, - pub element: (PreHashObject, pyo3::Py, usize), -} - -impl LinkedList { - pub fn new() -> Self { - Self { - head: None, - tail: None, - len: 0, - } - } - - #[inline] - pub fn push_back( - &mut self, - key: PreHashObject, - val: pyo3::Py, - size: usize, - ) -> NonNull { - unsafe { - let node = NonNull::new_unchecked(Box::into_raw(Box::new(Node { - prev: None, - next: None, - element: (key, val, size), - }))); - - if let Some(old) = self.tail { - (*old.as_ptr()).next = Some(node); - (*node.as_ptr()).prev = Some(old); - } else { - // means list is empty, so this node is also can be the front of list - debug_assert!(self.head.is_none(), "head is not None"); - self.head = Some(node); - } - - self.tail = Some(node); - self.len += 1; - node - } - } - - #[inline] - pub fn pop_front(&mut self) -> Option<(PreHashObject, pyo3::Py, usize)> { - unsafe { - self.head.map(|node| { - let boxed_node = Box::from_raw(node.as_ptr()); - debug_assert!(boxed_node.prev.is_none(), "head.prev is not None"); - - self.head = boxed_node.next; - - match self.head { - None => self.tail = None, - // Not creating new mutable (unique!) references overlapping `element`. - Some(head) => (*head.as_ptr()).prev = None, - } - - debug_assert!(self.len > 0, "self.len is zero"); - self.len -= 1; - boxed_node.element - }) - } - } - - #[inline] - pub fn clear(&mut self) { - while self.pop_front().is_some() {} - } - - #[inline] - pub unsafe fn remove( - &mut self, - node: NonNull, - ) -> (PreHashObject, pyo3::Py, usize) { - let node = Box::from_raw(node.as_ptr()); - let result = node.element; - - match node.next { - Some(next) => (*next.as_ptr()).prev = node.prev, - None => { - // Means this node is our self.tail - self.tail = node.prev; - } - } - - match node.prev { - Some(prev) => (*prev.as_ptr()).next = node.next, - None => { - // Means this node is our self.head - self.head = node.next; - } - } - - self.len -= 1; - result - } - - #[inline] - pub unsafe fn move_back(&mut self, node: NonNull) { - if (*node.as_ptr()).next.is_none() { - // Means this node is our self.tail - return; - } - - // unlink - match (*node.as_ptr()).next { - Some(next) => (*next.as_ptr()).prev = (*node.as_ptr()).prev, - None => std::hint::unreachable_unchecked(), - } - - match (*node.as_ptr()).prev { - Some(prev) => (*prev.as_ptr()).next = (*node.as_ptr()).next, - None => { - // Means this node is our self.head - self.head = (*node.as_ptr()).next; - } - } - - (*node.as_ptr()).next = None; - (*node.as_ptr()).prev = None; - - // push_back again - if let Some(old) = self.tail { - (*old.as_ptr()).next = Some(node); - (*node.as_ptr()).prev = Some(old); - } else { - // means list is empty, so this node is also can be the front of list - debug_assert!(self.head.is_none(), "head is not None"); - self.head = Some(node); - } - - self.tail = Some(node); - } - - #[inline] - pub fn iter(&self) -> Iter { - Iter { - head: self.head, - len: self.len, - } - } -} - -pub struct Iter { - head: Option>, - len: usize, -} - -impl Iterator for Iter { - type Item = NonNull; - - #[inline] - fn next(&mut self) -> Option { - if self.len == 0 { - None - } else { - self.head.inspect(|node| unsafe { - self.len -= 1; - self.head = (*node.as_ptr()).next; - }) - } - } -} - -impl Drop for LinkedList { - fn drop(&mut self) { - struct DropGuard<'a>(&'a mut LinkedList); - - impl Drop for DropGuard<'_> { - fn drop(&mut self) { - // Continue the same loop we do below. This only runs when a destructor has - // panicked. If another one panics this will abort. - while self.0.pop_front().is_some() {} - } - } - - // Wrap self so that if a destructor panics, we can try to keep looping - let guard = DropGuard(self); - while guard.0.pop_front().is_some() {} - core::mem::forget(guard); - } -} - -unsafe impl Sync for Iter {} -unsafe impl Send for Iter {} diff --git a/python/tests/__init__.py b/src/macro_rules.rs similarity index 100% rename from python/tests/__init__.py rename to src/macro_rules.rs diff --git a/src/policies/fifo.rs b/src/policies/fifo.rs deleted file mode 100644 index 81ac15e..0000000 --- a/src/policies/fifo.rs +++ /dev/null @@ -1,497 +0,0 @@ -use crate::common::Entry; -use crate::common::NoLifetimeSliceIter; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TryFindMethods; - -use std::collections::VecDeque; - -pub const MAX_N_SHIFT: usize = usize::MAX - (isize::MAX as usize); - -pub struct FIFOPolicy { - /// We set [Vec] objects indexes in hashtable to make search O(1). hashtable is unordered, - /// that is why we are using [Vec]. - table: hashbrown::raw::RawTable, - - /// Keep objects in order. - entries: VecDeque<(PreHashObject, pyo3::Py, usize)>, - maxsize: core::num::NonZeroUsize, - maxmemory: core::num::NonZeroUsize, - memory: usize, - - /// When we pop front an object from entries, two operations have to do: - /// 1. Shift all elements in vector. - /// 2. Decrement all indexes in hashtable. - /// - /// these are expensive operations in large elements; - /// - We removed first operation by using [`std::collections::VecDeque`] instead of [`Vec`] - /// - We removed second operation by using this variable: Instead of decrement indexes in hashtable, - /// we will increment this variable. - n_shifts: usize, - - pub observed: Observed, -} - -pub struct FIFOPolicyOccupied<'a> { - instance: &'a mut FIFOPolicy, - bucket: hashbrown::raw::Bucket, -} - -pub struct FIFOPolicyAbsent<'a> { - instance: &'a mut FIFOPolicy, - insert_slot: Option, -} - -pub struct FIFOIterator { - first: NoLifetimeSliceIter<(PreHashObject, pyo3::Py, usize)>, - second: NoLifetimeSliceIter<(PreHashObject, pyo3::Py, usize)>, -} - -impl FIFOPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - entries: VecDeque::new(), - maxsize, - maxmemory, - memory: 0, - n_shifts: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - #[inline] - pub fn len(&self) -> usize { - self.table.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() - } - - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> (usize, usize) { - (self.table.capacity(), self.entries.capacity()) - } - - #[inline] - fn decrement_indexes(&mut self, start: usize, end: usize) { - if start <= 1 && end == self.entries.len() && self.n_shifts < MAX_N_SHIFT { - self.n_shifts += 1; - return; - } - - if (end - start) > self.table.buckets() / 2 { - unsafe { - for bucket in self.table.iter() { - let i = bucket.as_mut(); - if start <= (*i) - self.n_shifts && (*i) - self.n_shifts < end { - *i -= 1; - } - } - } - } else { - let shifted = self.entries.range(start..end); - for (i, entry) in (start..end).zip(shifted) { - let old = self - .table - .get_mut(entry.0.hash, |x| (*x) - self.n_shifts == i) - .expect("index not found"); - - *old -= 1; - } - } - } - - #[inline] - pub fn popitem( - &mut self, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult, usize)>> { - let ret = self.entries.front(); - if ret.is_none() { - return Ok(None); - } - - let ret = unsafe { ret.unwrap_unchecked() }; - - match self.table.try_find(ret.0.hash, |x| { - self.entries[(*x) - self.n_shifts].0.equal(py, &ret.0) - })? { - Some(bucket) => { - unsafe { self.table.remove(bucket) }; - } - None => unreachable!("popitem key not found in table"), - } - - let ret = unsafe { self.entries.pop_front().unwrap_unchecked() }; - self.memory = self.memory.saturating_sub(ret.2); - - self.observed.change(); - - self.decrement_indexes(1, self.entries.len()); - Ok(Some(ret)) - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, FIFOPolicyAbsent<'_>>> { - match self - .table - .try_find(key.hash, |x| self.entries[(*x) - self.n_shifts].0.equal(py, key))? - { - Some(bucket) => { - Ok( - Entry::Occupied(FIFOPolicyOccupied { instance: self, bucket }) - ) - } - None => { - Ok( - Entry::Absent(FIFOPolicyAbsent { instance: self, insert_slot: None }) - ) - }, - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, FIFOPolicyAbsent<'_>>> { - match self.table.try_find_or_find_insert_slot( - key.hash, - |x| self.entries[(*x) - self.n_shifts].0.equal(py, key), - |x| self.entries[(*x) - self.n_shifts].0.hash, - )? { - Ok(bucket) => Ok( - Entry::Occupied(FIFOPolicyOccupied { instance: self, bucket }) - ), - Err(insert_slot) => Ok( - Entry::Absent(FIFOPolicyAbsent { instance: self, insert_slot: Some(insert_slot) }) - ), - } - } - - #[inline] - pub fn lookup( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - match self - .table - .try_find(key.hash, |x| { - self.entries[(*x) - self.n_shifts].0.equal(py, key) - })? - .map(|bucket| unsafe { bucket.as_ref() }) - { - Some(index) => Ok(Some(&self.entries[(*index) - self.n_shifts].1)), - None => Ok(None), - } - } - - pub fn clear(&mut self) { - self.table.clear(); - self.entries.clear(); - self.n_shifts = 0; - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self) { - self.table.shrink_to(self.table.len(), |x| { - self.entries[(*x) - self.n_shifts].0.hash - }); - self.entries.shrink_to_fit(); - self.observed.change(); - } - - pub fn entries_iter( - &self, - ) -> std::collections::vec_deque::Iter<'_, (PreHashObject, pyo3::Py, usize)> { - self.entries.iter() - } - - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.table.len() != other.table.len() { - return Ok(false); - } - - unsafe { - for index1 in self.table.iter().map(|x| x.as_ref()) { - let (key1, value1, _) = &self.entries[(*index1) - self.n_shifts]; - - match other.table.try_find(key1.hash, |x| { - key1.equal(py, &other.entries[(*x) - other.n_shifts].0) - })? { - Some(bucket) => { - let (_, value2, _) = &other.entries[(*bucket.as_ref()) - other.n_shifts]; - - if !crate::common::pyobject_equal(py, value1.as_ptr(), value2.as_ptr())? { - return Ok(false); - } - } - None => return Ok(false), - } - } - } - - Ok(true) - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - } - } - } - - Ok(()) - } - - pub fn iter(&self) -> FIFOIterator { - let (a, b) = self.entries.as_slices(); - - FIFOIterator { - first: NoLifetimeSliceIter::new(a), - second: NoLifetimeSliceIter::new(b), - } - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - - unsafe { - let (maxsize, iterable, capacity, maxmemory) = extract_pickle_tuple!(py, state => list); - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - _ => std::hint::unreachable_unchecked(), - } - } - - *self = new; - Ok(()) - } - } - - #[inline(always)] - pub fn get_index(&self, n: usize) -> Option<&(PreHashObject, pyo3::Py, usize)> { - self.entries.get(n) - } -} - -impl<'a> FIFOPolicyOccupied<'a> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - let index = unsafe { self.bucket.as_ref() }; - let old_value; - { - let item = &mut self.instance.entries[index - self.instance.n_shifts]; - let new_size = crate::common::entry_size(py, &item.0, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let old_size = item.2; - old_value = std::mem::replace(&mut item.1, value); - item.2 = new_size; - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - } - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem(py)?.is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> (PreHashObject, pyo3::Py, usize) { - let (mut index, _) = unsafe { self.instance.table.remove(self.bucket) }; - index -= self.instance.n_shifts; - - self.instance - .decrement_indexes(index + 1, self.instance.entries.len()); - - let m = self.instance.entries.remove(index).unwrap(); - self.instance.memory = self.instance.memory.saturating_sub(m.2); - - self.instance.observed.change(); - m - } - - pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::Py, usize) { - let index = unsafe { self.bucket.as_ref() }; - &mut self.instance.entries[index - self.instance.n_shifts] - } -} - -impl FIFOPolicyAbsent<'_> { - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - if self.instance.popitem(py)?.is_none() { - break; - } - } - - match self.insert_slot { - Some(slot) => unsafe { - self.instance.table.insert_in_slot( - key.hash, - slot, - self.instance.entries.len() + self.instance.n_shifts, - ); - }, - None => { - self.instance.table.insert( - key.hash, - self.instance.entries.len() + self.instance.n_shifts, - |index| { - self.instance.entries[(*index) - self.instance.n_shifts] - .0 - .hash - }, - ); - } - } - - self.instance.entries.push_back((key, value, entry_size)); - self.instance.memory = self.instance.memory.saturating_add(entry_size); - - self.instance.observed.change(); - Ok(()) - } -} - -impl Iterator for FIFOIterator { - type Item = std::ptr::NonNull<(PreHashObject, pyo3::Py, usize)>; - - #[inline] - fn next(&mut self) -> Option { - match self.first.next() { - Some(val) => Some(val), - None => { - core::mem::swap(&mut self.first, &mut self.second); - self.first.next() - } - } - } -} - -unsafe impl Send for FIFOIterator {} diff --git a/src/policies/lfu.rs b/src/policies/lfu.rs deleted file mode 100644 index 9b57699..0000000 --- a/src/policies/lfu.rs +++ /dev/null @@ -1,428 +0,0 @@ -use crate::common::Entry; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TryFindMethods; -use crate::lazyheap; -use std::ptr::NonNull; - -type TupleValue = (PreHashObject, pyo3::Py, usize, usize); - -pub struct LFUPolicy { - table: hashbrown::raw::RawTable>, - heap: lazyheap::LazyHeap, - maxsize: std::num::NonZeroUsize, - maxmemory: std::num::NonZeroUsize, - memory: usize, - pub observed: Observed, -} - -pub struct LFUPolicyOccupied<'a> { - instance: &'a mut LFUPolicy, - bucket: hashbrown::raw::Bucket>, -} - -pub struct LFUPolicyAbsent<'a> { - instance: &'a mut LFUPolicy, - insert_slot: Option, -} - -pub type LFUIterator = lazyheap::Iter<(PreHashObject, pyo3::Py, usize, usize)>; - -impl LFUPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - heap: lazyheap::LazyHeap::new(), - maxsize, - maxmemory, - memory: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - #[inline] - pub fn len(&self) -> usize { - self.table.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() - } - - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> usize { - self.table.capacity() - } - - #[inline] - pub fn popitem(&mut self) -> Option { - self.heap.sort_by(|a, b| a.2.cmp(&b.2)); - let front = self.heap.front()?; - - unsafe { - self.table - .remove_entry(front.as_ref().0.hash, |x| { - std::ptr::eq(x.as_ptr(), front.as_ptr()) - }) - .unwrap(); - } - - self.observed.change(); - let item = self.heap.pop_front(|a, b| a.2.cmp(&b.2)).unwrap(); - self.memory = self.memory.saturating_sub(item.3); - Some(item) - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, LFUPolicyAbsent<'_>>> { - match self - .table - .try_find(key.hash, |ptr| unsafe { ptr.as_ref().0.equal(py, key) })? - { - Some(bucket) => { - Ok( - Entry::Occupied(LFUPolicyOccupied { instance: self, bucket }) - ) - }, - None => { - Ok( - Entry::Absent(LFUPolicyAbsent { instance: self, insert_slot: None }) - ) - } - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, LFUPolicyAbsent<'_>>> { - match self.table.try_find_or_find_insert_slot( - key.hash, - |ptr| unsafe { ptr.as_ref().0.equal(py, key) }, - |ptr| unsafe { ptr.as_ref().0.hash }, - )? { - Ok(bucket) => { - Ok( - Entry::Occupied(LFUPolicyOccupied { instance: self, bucket }) - ) - }, - Err(slot) => { - Ok( - Entry::Absent(LFUPolicyAbsent { instance: self, insert_slot: Some(slot) }) - ) - } - } - } - - #[inline] - pub fn lookup( - &mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - match self.entry(py, key)? { - Entry::Occupied(x) => unsafe { - x.bucket.as_mut().as_mut().2 += 1; - x.instance.heap.queue_sort(); - - Ok(Some(&x.bucket.as_ref().as_ref().1)) - }, - Entry::Absent(_) => Ok(None), - } - } - - pub fn peek( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - let result = self - .table - .try_find(key.hash, |x| unsafe { x.as_ref().0.equal(py, key) })? - .map(|x| unsafe { &x.as_ref().as_ref().1 }); - - Ok(result) - } - - pub fn clear(&mut self) { - self.table.clear(); - self.heap.clear(); - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self) { - self.table - .shrink_to(self.table.len(), |x| unsafe { x.as_ref().0.hash }); - - self.heap.shrink_to_fit(); - self.observed.change(); - } - - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.table.len() != other.table.len() { - return Ok(false); - } - - unsafe { - for node in self.table.iter().map(|x| x.as_ref()) { - let (key1, value1, _, _) = node.as_ref(); - - match other - .table - .try_find(key1.hash, |x| key1.equal(py, &x.as_ref().0))? - { - Some(bucket) => { - let (_, value2, _, _) = bucket.as_ref().as_ref(); - - if !crate::common::pyobject_equal(py, value1.as_ptr(), value2.as_ptr())? { - return Ok(false); - } - } - None => return Ok(false), - } - } - } - - Ok(true) - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind(), 0)?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value, 0)?; - } - } - } - } - - Ok(()) - } - - pub fn iter(&mut self) -> LFUIterator { - self.heap.iter(|a, b| a.2.cmp(&b.2)) - } - - pub fn least_frequently_used(&mut self, n: usize) -> Option> { - self.heap.sort_by(|a, b| a.2.cmp(&b.2)); - let node = self.heap.get(n)?; - - Some(*node) - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - - unsafe { - let (maxsize, iterable, capacity, maxmemory) = extract_pickle_tuple!(py, state => list); - - // SAFETY: we check `iterable` type in `extract_pickle_tuple` macro - if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { - return Err(pyo3::PyErr::new::( - "iterable object size is greater than maxsize", - )); - } - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value, freq) = - pair?.extract::<(pyo3::Py, pyo3::Py, usize)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.insert(py, hk, value, freq)?; - } - _ => std::hint::unreachable_unchecked(), - } - } - - new.heap.sort_by(|a, b| a.2.cmp(&b.2)); - - *self = new; - Ok(()) - } - } -} - -impl LFUPolicyOccupied<'_> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - let item = unsafe { self.bucket.as_mut() }; - let (old_value, old_size, new_size) = { - let element = unsafe { item.as_mut() }; - let new_size = crate::common::entry_size(py, &element.0, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let old_size = element.3; - let old_value = std::mem::replace(&mut element.1, value); - element.3 = new_size; - element.2 += 1; - (old_value, old_size, new_size) - }; - - self.instance.heap.queue_sort(); - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem().is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> TupleValue { - let (item, _) = unsafe { self.instance.table.remove(self.bucket) }; - let item = self.instance.heap.remove(item, |a, b| a.2.cmp(&b.2)); - - self.instance.memory = self.instance.memory.saturating_sub(item.3); - self.instance.observed.change(); - item - } - - pub fn into_value(self) -> NonNull { - let item = unsafe { self.bucket.as_mut() }; - *item - } -} - -impl LFUPolicyAbsent<'_> { - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - freq: usize, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - if self.instance.popitem().is_none() { - break; - } - } - - let hash = key.hash; - let node = self.instance.heap.push((key, value, freq, entry_size)); - - match self.insert_slot { - Some(slot) => unsafe { - self.instance.table.insert_in_slot(hash, slot, node); - }, - None => { - self.instance - .table - .insert(hash, node, |x| unsafe { x.as_ref().0.hash }); - } - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - Ok(()) - } -} - -unsafe impl Send for LFUPolicy {} diff --git a/src/policies/lru.rs b/src/policies/lru.rs deleted file mode 100644 index 8a9dab5..0000000 --- a/src/policies/lru.rs +++ /dev/null @@ -1,420 +0,0 @@ -use crate::common::Entry; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TryFindMethods; -use crate::linked_list; - -type NotNullNode = std::ptr::NonNull; - -pub struct LRUPolicy { - table: hashbrown::raw::RawTable, - list: linked_list::LinkedList, - maxsize: std::num::NonZeroUsize, - maxmemory: std::num::NonZeroUsize, - memory: usize, - pub observed: Observed, -} - -pub struct LRUPolicyOccupied<'a> { - instance: &'a mut LRUPolicy, - bucket: hashbrown::raw::Bucket, -} - -pub struct LRUPolicyAbsent<'a> { - instance: &'a mut LRUPolicy, - insert_slot: Option, -} - -impl LRUPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - list: linked_list::LinkedList::new(), - maxsize, - maxmemory, - memory: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - #[inline] - pub fn len(&self) -> usize { - self.table.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() - } - - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> usize { - self.table.capacity() - } - - #[inline] - pub fn popitem(&mut self) -> Option<(PreHashObject, pyo3::Py, usize)> { - let ret = self.list.head?; - - unsafe { - self.table - .remove_entry((*ret.as_ptr()).element.0.hash, |node| { - core::ptr::eq(node.as_ptr(), ret.as_ptr()) - }) - .expect("popitem key not found."); - } - - self.observed.change(); - let item = self.list.pop_front().unwrap(); - self.memory = self.memory.saturating_sub(item.2); - Some(item) - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, LRUPolicyAbsent<'_>>> { - match self - .table - .try_find(key.hash, |x| unsafe { x.as_ref().element.0.equal(py, key) })? - { - Some(bucket) => { - Ok( - Entry::Occupied(LRUPolicyOccupied { instance: self, bucket }) - ) - } - None => { - Ok( - Entry::Absent(LRUPolicyAbsent { instance: self, insert_slot: None }) - ) - }, - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, LRUPolicyAbsent<'_>>> { - match self - .table - .try_find_or_find_insert_slot( - key.hash, - |x| unsafe { x.as_ref().element.0.equal(py, key) }, - |x| unsafe { x.as_ref().element.0.hash } - )? { - Ok(bucket) => { - Ok( - Entry::Occupied(LRUPolicyOccupied { instance: self, bucket }) - ) - } - Err(slot) => { - Ok( - Entry::Absent(LRUPolicyAbsent { instance: self, insert_slot: Some(slot) }) - ) - }, - } - } - - #[inline] - pub fn lookup( - &mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - match self.entry(py, key)? { - Entry::Occupied(x) => unsafe { - x.instance.list.move_back(*x.bucket.as_ptr()); - - Ok(Some(&x.bucket.as_ref().as_ref().element.1)) - }, - Entry::Absent(_) => Ok(None), - } - } - - pub fn peek( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - let result = self - .table - .try_find(key.hash, |x| unsafe { x.as_ref().element.0.equal(py, key) })? - .map(|x| unsafe { &x.as_ref().as_ref().element.1 }); - - Ok(result) - } - - pub fn clear(&mut self) { - self.table.clear(); - self.list.clear(); - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self) { - self.table - .shrink_to(self.table.len(), |x| unsafe { x.as_ref().element.0.hash }); - - self.observed.change(); - } - - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.table.len() != other.table.len() { - return Ok(false); - } - - unsafe { - for node in self.table.iter().map(|x| x.as_ref()) { - let (key1, value1, _) = &node.as_ref().element; - - match other - .table - .try_find(key1.hash, |x| key1.equal(py, &x.as_ref().element.0))? - { - Some(bucket) => { - let (_, value2, _) = &bucket.as_ref().as_ref().element; - - if !crate::common::pyobject_equal(py, value1.as_ptr(), value2.as_ptr())? { - return Ok(false); - } - } - None => return Ok(false), - } - } - } - - Ok(true) - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - } - } - } - - Ok(()) - } - - pub fn iter(&self) -> linked_list::Iter { - self.list.iter() - } - - pub fn least_recently_used(&self) -> Option<&(PreHashObject, pyo3::Py, usize)> { - self.list.head.map(|x| unsafe { &x.as_ref().element }) - } - - pub fn most_recently_used(&self) -> Option<&(PreHashObject, pyo3::Py, usize)> { - self.list.tail.map(|x| unsafe { &x.as_ref().element }) - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - unsafe { - let (maxsize, iterable, capacity, maxmemory) = extract_pickle_tuple!(py, state => list); - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - _ => std::hint::unreachable_unchecked(), - } - } - - *self = new; - Ok(()) - } - } -} - -impl<'a> LRUPolicyOccupied<'a> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - let item = unsafe { self.bucket.as_mut() }; - let (old_value, old_size, new_size) = { - let element = unsafe { item.as_mut() }; - let new_size = crate::common::entry_size(py, &element.element.0, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let old_size = element.element.2; - let old_value = std::mem::replace(&mut element.element.1, value); - element.element.2 = new_size; - (old_value, old_size, new_size) - }; - - unsafe { - self.instance.list.move_back(*item); - } - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem().is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> (PreHashObject, pyo3::Py, usize) { - // let (PreHashObject { hash, .. }, _) = &self.instance.entries[self.index - self.instance.n_shifts]; - let (item, _) = unsafe { self.instance.table.remove(self.bucket) }; - let item = unsafe { self.instance.list.remove(item) }; - - self.instance.memory = self.instance.memory.saturating_sub(item.2); - self.instance.observed.change(); - item - } - - pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::Py, usize) { - unsafe { - self.instance.list.move_back(*self.bucket.as_ptr()); - } - - let item = unsafe { self.bucket.as_mut() }; - unsafe { &mut item.as_mut().element } - } -} - -impl LRUPolicyAbsent<'_> { - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - if self.instance.popitem().is_none() { - break; - } - } - - let hash = key.hash; - let node = self.instance.list.push_back(key, value, entry_size); - - match self.insert_slot { - Some(slot) => unsafe { - self.instance.table.insert_in_slot(hash, slot, node); - }, - None => { - self.instance - .table - .insert(hash, node, |x| unsafe { x.as_ref().element.0.hash }); - } - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - Ok(()) - } -} - -unsafe impl Send for LRUPolicy {} diff --git a/src/policies/mod.rs b/src/policies/mod.rs index 2945250..e69de29 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -1,7 +0,0 @@ -pub mod fifo; -pub mod lfu; -pub mod lru; -pub mod nopolicy; -pub mod random; -pub mod ttl; -pub mod vttl; diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 3629f2b..e69de29 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -1,360 +0,0 @@ -use crate::common::Entry; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TryFindMethods; - -pub struct NoPolicy { - table: hashbrown::raw::RawTable<(PreHashObject, pyo3::Py, usize)>, - maxsize: std::num::NonZeroUsize, - maxmemory: std::num::NonZeroUsize, - memory: usize, - pub observed: Observed, -} - -pub struct NoPolicyOccupied<'a> { - instance: &'a mut NoPolicy, - bucket: hashbrown::raw::Bucket<(PreHashObject, pyo3::Py, usize)>, -} - -pub struct NoPolicyAbsent<'a> { - instance: &'a mut NoPolicy, - insert_slot: Option, -} - -impl NoPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - maxsize, - maxmemory, - memory: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - #[inline] - pub fn len(&self) -> usize { - self.table.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() - } - - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> usize { - self.table.capacity() - } - - pub fn iter(&self) -> hashbrown::raw::RawIter<(PreHashObject, pyo3::Py, usize)> { - unsafe { self.table.iter() } - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, NoPolicyAbsent<'_>>> { - match self.table.try_find(key.hash, |(x, _, _)| x.equal(py, key))? { - Some(bucket) => { - Ok( - Entry::Occupied(NoPolicyOccupied { instance: self, bucket }) - ) - }, - None => { - Ok( - Entry::Absent(NoPolicyAbsent { instance: self, insert_slot: None }) - ) - } - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, NoPolicyAbsent<'_>>> { - match self.table.try_find_or_find_insert_slot( - key.hash, - |(x, _, _)| x.equal(py, key), - |(x, _, _)| x.hash, - )? { - Ok(bucket) => Ok( - Entry::Occupied(NoPolicyOccupied { instance: self, bucket }) - ), - Err(insert_slot) => Ok( - Entry::Absent(NoPolicyAbsent { instance: self, insert_slot: Some(insert_slot) }) - ), - } - } - - #[inline] - pub fn lookup( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - match self - .table - .try_find(key.hash, |(x, _, _)| x.equal(py, key))? - { - Some(x) => Ok(Some(unsafe { &x.as_ref().1 })), - None => Ok(None), - } - } - - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.table.len() != other.table.len() { - return Ok(false); - } - - let mut error = None; - - let result = unsafe { - self.table.iter().all(|bucket| { - let (key, val, _) = bucket.as_ref(); - - match other.table.try_find(key.hash, |(x, _, _)| x.equal(py, key)) { - Err(e) => { - error = Some(e); - true - } - Ok(Some(bucket)) => { - let (_, val2, _) = bucket.as_ref(); - - match crate::common::pyobject_equal(py, val.as_ptr(), val2.as_ptr()) { - Ok(result) => result, - Err(e) => { - error = Some(e); - true - } - } - } - Ok(None) => false, - } - }) - }; - - if let Some(error) = error { - return Err(error); - } - - Ok(result) - } - - pub fn clear(&mut self) { - self.table.clear(); - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self) { - self.table.shrink_to(self.table.len(), |(x, _, _)| x.hash); - self.observed.change(); - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - } - } - } - - Ok(()) - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyDictMethods; - - let (maxsize, iterable, capacity, maxmemory) = - unsafe { extract_pickle_tuple!(py, state => dict) }; - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - // SAFETY: we checked that the iterable is a dict in extract_pickle_tuple! macro - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - unsafe { - for (key, value) in dict.iter() { - let hk = PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked(); - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - _ => std::hint::unreachable_unchecked(), - } - } - } - - *self = new; - Ok(()) - } -} - -impl<'a> NoPolicyOccupied<'a> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - unsafe { - let item = self.bucket.as_mut(); - let new_size = crate::common::entry_size(py, &item.0, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let next_memory = self - .instance - .memory - .saturating_sub(item.2) - .saturating_add(new_size); - if next_memory > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - let old_value = std::mem::replace(&mut item.1, value); - item.2 = new_size; - self.instance.memory = next_memory; - Ok(old_value) - } - } - - #[inline] - pub fn remove(self) -> (PreHashObject, pyo3::Py, usize) { - let (x, _) = unsafe { self.instance.table.remove(self.bucket) }; - self.instance.memory = self.instance.memory.saturating_sub(x.2); - self.instance.observed.change(); - x - } - - pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::Py, usize) { - unsafe { self.bucket.as_mut() } - } -} - -impl NoPolicyAbsent<'_> { - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - - if entry_size > self.instance.maxmemory.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - if self.instance.table.len() >= self.instance.maxsize.get() { - // There's no algorithm for removing a key-value pair, so we raise PyOverflowError. - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - match self.insert_slot { - Some(slot) => unsafe { - self.instance - .table - .insert_in_slot(key.hash, slot, (key, value, entry_size)); - }, - None => { - self.instance - .table - .insert(key.hash, (key, value, entry_size), |(x, _, _)| x.hash); - } - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - Ok(()) - } -} diff --git a/src/policies/random.rs b/src/policies/random.rs deleted file mode 100644 index 0803724..0000000 --- a/src/policies/random.rs +++ /dev/null @@ -1,391 +0,0 @@ -use crate::common::Entry; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TryFindMethods; - -pub struct RandomPolicy { - table: hashbrown::raw::RawTable<(PreHashObject, pyo3::Py, usize)>, - maxsize: std::num::NonZeroUsize, - maxmemory: std::num::NonZeroUsize, - memory: usize, - pub observed: Observed, -} - -pub struct RandomPolicyOccupied<'a> { - instance: &'a mut RandomPolicy, - bucket: hashbrown::raw::Bucket<(PreHashObject, pyo3::Py, usize)>, -} - -pub struct RandomPolicyAbsent<'a> { - instance: &'a mut RandomPolicy, - insert_slot: Option, -} - -impl RandomPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - maxsize, - maxmemory, - memory: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - #[inline] - pub fn len(&self) -> usize { - self.table.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() - } - - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> usize { - self.table.capacity() - } - - pub fn iter(&self) -> hashbrown::raw::RawIter<(PreHashObject, pyo3::Py, usize)> { - unsafe { self.table.iter() } - } - - #[inline] - pub fn popitem( - &mut self, - ) -> pyo3::PyResult, usize)>> { - if self.table.is_empty() { - Ok(None) - } else { - let nth = fastrand::usize(0..self.table.len()); - - let bucket = unsafe { self.table.iter().nth(nth).unwrap_unchecked() }; - let (x, _) = unsafe { self.table.remove(bucket) }; - self.memory = self.memory.saturating_sub(x.2); - - self.observed.change(); - Ok(Some(x)) - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, RandomPolicyAbsent<'_>>> { - match self.table.try_find(key.hash, |(x, _, _)| x.equal(py, key))? { - Some(bucket) => { - Ok( - Entry::Occupied(RandomPolicyOccupied { instance: self, bucket }) - ) - }, - None => { - Ok( - Entry::Absent(RandomPolicyAbsent { instance: self, insert_slot: None }) - ) - } - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, RandomPolicyAbsent<'_>>> { - match self.table.try_find_or_find_insert_slot( - key.hash, - |(x, _, _)| x.equal(py, key), - |(x, _, _)| x.hash, - )? { - Ok(bucket) => Ok( - Entry::Occupied(RandomPolicyOccupied { instance: self, bucket }) - ), - Err(insert_slot) => Ok( - Entry::Absent(RandomPolicyAbsent { instance: self, insert_slot: Some(insert_slot) }) - ), - } - } - - #[inline] - pub fn lookup( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult>> { - match self - .table - .try_find(key.hash, |(x, _, _)| x.equal(py, key))? - { - Some(x) => Ok(Some(unsafe { &x.as_ref().1 })), - None => Ok(None), - } - } - - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.table.len() != other.table.len() { - return Ok(false); - } - - let mut error = None; - - let result = unsafe { - self.table.iter().all(|bucket| { - let (key, val, _) = bucket.as_ref(); - - match other.table.try_find(key.hash, |(x, _, _)| x.equal(py, key)) { - Err(e) => { - error = Some(e); - true - } - Ok(Some(bucket)) => { - let (_, val2, _) = bucket.as_ref(); - - match crate::common::pyobject_equal(py, val.as_ptr(), val2.as_ptr()) { - Ok(result) => result, - Err(e) => { - error = Some(e); - true - } - } - } - Ok(None) => false, - } - }) - }; - - if let Some(error) = error { - return Err(error); - } - - Ok(result) - } - - pub fn clear(&mut self) { - self.table.clear(); - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self) { - self.table.shrink_to(self.table.len(), |(x, _, _)| x.hash); - self.observed.change(); - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - } - } - } - - Ok(()) - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyDictMethods; - - let (maxsize, iterable, capacity, maxmemory) = - unsafe { extract_pickle_tuple!(py, state => dict) }; - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - // SAFETY: we checked that the iterable is a dict in extract_pickle_tuple! macro - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - unsafe { - for (key, value) in dict.iter() { - let hk = PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked(); - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - _ => std::hint::unreachable_unchecked(), - } - } - } - - *self = new; - Ok(()) - } - - pub fn random_key(&self) -> Option<&PreHashObject> { - if self.table.is_empty() { - None - } else { - let nth = fastrand::usize(0..self.table.len()); - - let bucket = unsafe { self.table.iter().nth(nth).unwrap_unchecked() }; - let (key, _, _) = unsafe { bucket.as_ref() }; - - Some(key) - } - } -} - -impl<'a> RandomPolicyOccupied<'a> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - let old_value; - { - let item = unsafe { self.bucket.as_mut() }; - let new_size = crate::common::entry_size(py, &item.0, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let old_size = item.2; - old_value = std::mem::replace(&mut item.1, value); - item.2 = new_size; - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - } - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem()?.is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> (PreHashObject, pyo3::Py, usize) { - let (x, _) = unsafe { self.instance.table.remove(self.bucket) }; - self.instance.memory = self.instance.memory.saturating_sub(x.2); - self.instance.observed.change(); - x - } - - pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::Py, usize) { - unsafe { self.bucket.as_mut() } - } -} - -impl RandomPolicyAbsent<'_> { - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - if self.instance.popitem()?.is_none() { - break; - } - } - - match self.insert_slot { - Some(slot) => unsafe { - self.instance - .table - .insert_in_slot(key.hash, slot, (key, value, entry_size)); - }, - None => { - self.instance - .table - .insert(key.hash, (key, value, entry_size), |(x, _, _)| x.hash); - } - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - Ok(()) - } -} diff --git a/src/policies/ttl.rs b/src/policies/ttl.rs deleted file mode 100644 index ddaff20..0000000 --- a/src/policies/ttl.rs +++ /dev/null @@ -1,770 +0,0 @@ -use super::fifo::MAX_N_SHIFT; -use crate::common::AbsentSituation; -use crate::common::Entry; -use crate::common::NoLifetimeSliceIter; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TimeToLivePair; -use crate::common::TryFindMethods; - -use std::collections::VecDeque; - -pub struct TTLPolicy { - // See FIFOPolicy to find out fields - table: hashbrown::raw::RawTable, - entries: VecDeque, - maxsize: core::num::NonZeroUsize, - maxmemory: core::num::NonZeroUsize, - memory: usize, - ttl: std::time::Duration, - n_shifts: usize, - pub observed: Observed, -} - -pub struct TTLPolicyOccupied<'a> { - instance: &'a mut TTLPolicy, - bucket: hashbrown::raw::Bucket, -} - -pub struct TTLPolicyAbsent<'a> { - instance: &'a mut TTLPolicy, - situation: AbsentSituation, -} - -pub struct TTLIterator { - first: NoLifetimeSliceIter, - second: NoLifetimeSliceIter, -} - -impl TTLPolicy { - pub fn new( - maxsize: usize, - mut capacity: usize, - secs: f64, - maxmemory: usize, - ) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - entries: VecDeque::new(), - maxsize, - maxmemory, - memory: 0, - ttl: std::time::Duration::from_secs_f64(secs), - n_shifts: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - pub fn ttl(&self) -> std::time::Duration { - self.ttl - } - - #[inline] - pub fn real_len(&self) -> usize { - let now = std::time::SystemTime::now(); - let mut c = 0usize; - - for item in &self.entries { - if !item.is_expired(now) { - break; - } - - c += 1; - } - - self.table.len() - c - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.real_len() == 0 - } - - pub fn is_full(&self) -> bool { - self.real_len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> (usize, usize) { - (self.table.capacity(), self.entries.capacity()) - } - - #[inline] - fn decrement_indexes(&mut self, start: usize, end: usize) { - if start <= 1 && end == self.entries.len() && self.n_shifts < MAX_N_SHIFT { - self.n_shifts += 1; - return; - } - - if (end - start) > self.table.buckets() / 2 { - unsafe { - for bucket in self.table.iter() { - let i = bucket.as_mut(); - if start <= (*i) - self.n_shifts && (*i) - self.n_shifts < end { - *i -= 1; - } - } - } - } else { - let shifted = self.entries.range(start..end); - for (i, entry) in (start..end).zip(shifted) { - let old = self - .table - .get_mut(entry.key.hash, |x| (*x) - self.n_shifts == i) - .expect("index not found"); - - *old -= 1; - } - } - } - - #[inline] - pub fn expire(&mut self, py: pyo3::Python<'_>) { - let now = std::time::SystemTime::now(); - - while let Some(e) = self.entries.front() { - if !e.is_expired(now) { - break; - } - - unsafe { - self.popitem(py).unwrap_unchecked(); - } - } - } - - #[inline] - pub fn popitem(&mut self, py: pyo3::Python<'_>) -> pyo3::PyResult> { - let ret = self.entries.front(); - if ret.is_none() { - return Ok(None); - } - - let ret = unsafe { ret.unwrap_unchecked() }; - - match self.table.try_find(ret.key.hash, |x| { - self.entries[(*x) - self.n_shifts].key.equal(py, &ret.key) - })? { - Some(bucket) => { - unsafe { self.table.remove(bucket) }; - } - None => unreachable!("popitem key not found in table"), - } - - let ret = unsafe { self.entries.pop_front().unwrap_unchecked() }; - self.memory = self.memory.saturating_sub(ret.size); - - self.observed.change(); - - self.decrement_indexes(1, self.entries.len()); - Ok(Some(ret)) - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, TTLPolicyAbsent<'_>>> { - match self - .table - .try_find(key.hash, |x| self.entries[(*x) - self.n_shifts].key.equal(py, key))? - { - Some(bucket) => { - let pair = &self.entries[unsafe { *bucket.as_ptr() } - self.n_shifts]; - - if !pair.is_expired(std::time::SystemTime::now()) { - Ok(Entry::Occupied(TTLPolicyOccupied { instance: self, bucket })) - } else { - Ok(Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) - } - } - None => { - Ok( - Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::None }) - ) - }, - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, TTLPolicyAbsent<'_>>> { - match self.table.try_find_or_find_insert_slot( - key.hash, - |x| self.entries[(*x) - self.n_shifts].key.equal(py, key), - |x| self.entries[(*x) - self.n_shifts].key.hash, - )? { - Ok(bucket) => { - let pair = &self.entries[unsafe { *bucket.as_ptr() } - self.n_shifts]; - - if !pair.is_expired(std::time::SystemTime::now()) { - Ok(Entry::Occupied(TTLPolicyOccupied { instance: self, bucket })) - } else { - Ok(Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) - } - }, - Err(insert_slot) => { - Ok( - Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::Slot(insert_slot) }) - ) - }, - } - } - - #[inline] - pub fn lookup( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult> { - match self - .table - .try_find(key.hash, |x| { - self.entries[(*x) - self.n_shifts].key.equal(py, key) - })? - .map(|bucket| unsafe { bucket.as_ref() }) - { - Some(index) => { - let pair = &self.entries[(*index) - self.n_shifts]; - - if !pair.is_expired(std::time::SystemTime::now()) { - Ok(Some(pair)) - } else { - Ok(None) - } - } - None => Ok(None), - } - } - - pub fn clear(&mut self) { - self.table.clear(); - self.entries.clear(); - self.n_shifts = 0; - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self, py: pyo3::Python<'_>) { - self.expire(py); - - self.table.shrink_to(self.table.len(), |x| { - self.entries[(*x) - self.n_shifts].key.hash - }); - self.entries.shrink_to_fit(); - self.observed.change(); - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind())?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind())?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value)?; - } - } - } - } - - Ok(()) - } - - pub fn entries_iter(&self) -> std::collections::vec_deque::Iter<'_, TimeToLivePair> { - self.entries.iter() - } - - pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.real_len() != other.real_len() { - return Ok(false); - } - - let now = std::time::SystemTime::now(); - - unsafe { - for index1 in self.table.iter().map(|x| x.as_ref()) { - let pair1 = &self.entries[(*index1) - self.n_shifts]; - - if pair1.is_expired(now) { - continue; - } - - match other.table.try_find(pair1.key.hash, |x| { - pair1 - .key - .equal(py, &other.entries[(*x) - other.n_shifts].key) - })? { - Some(bucket) => { - let pair2 = &other.entries[(*bucket.as_ref()) - other.n_shifts]; - - if pair2.is_expired(now) { - return Ok(false); - } - - if !crate::common::pyobject_equal( - py, - pair1.value.as_ptr(), - pair2.value.as_ptr(), - )? { - return Ok(false); - } - } - None => return Ok(false), - } - } - } - - Ok(true) - } - - pub fn iter(&mut self, py: pyo3::Python<'_>) -> TTLIterator { - self.expire(py); - - let (a, b) = self.entries.as_slices(); - - TTLIterator { - first: NoLifetimeSliceIter::new(a), - second: NoLifetimeSliceIter::new(b), - } - } - - pub fn get_index(&self, n: usize) -> Option<&TimeToLivePair> { - self.entries.get(n) - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - - unsafe { - if pyo3::ffi::PyTuple_CheckExact(state) == 0 { - return Err(pyo3::PyErr::new::( - "expected tuple, but got another type", - )); - } - - let size = pyo3::ffi::PyTuple_Size(state); - if size != 4 && size != 5 { - return Err(pyo3::PyErr::new::( - "tuple size is invalid", - )); - } - - let maxsize = { - let obj = pyo3::ffi::PyTuple_GetItem(state, 0); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take(py) { - return Err(e); - } - - let iterable = { - let obj = pyo3::ffi::PyTuple_GetItem(state, 1); - - if pyo3::ffi::PyList_CheckExact(obj) != 1 { - return Err(pyo3::PyErr::new::( - "the iterable object is not an dict or list", - )); - } - - pyo3::Py::::from_borrowed_ptr(py, obj) - }; - - let capacity = { - let obj = pyo3::ffi::PyTuple_GetItem(state, 2); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take(py) { - return Err(e); - } - - // SAFETY: we check `iterable` type in this function - if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { - return Err(pyo3::PyErr::new::( - "the iterable object size is more than maxsize!", - )); - } - - let ttl = { - let obj = pyo3::ffi::PyTuple_GetItem(state, 3); - pyo3::ffi::PyFloat_AsDouble(obj) - }; - - if let Some(e) = pyo3::PyErr::take(py) { - return Err(e); - } - - let maxmemory = if size == 5 { - let obj = pyo3::ffi::PyTuple_GetItem(state, 4); - let result = pyo3::ffi::PyLong_AsSize_t(obj); - - if let Some(e) = pyo3::PyErr::take(py) { - return Err(e); - } - - result - } else { - 0 - }; - - let mut new = Self::new(maxsize, capacity, ttl, maxmemory)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value, timestamp) = - pair?.extract::<(pyo3::Py, pyo3::Py, f64)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.pickle_insert( - py, - hk, - value, - std::time::UNIX_EPOCH + std::time::Duration::from_secs_f64(timestamp), - )?; - } - _ => std::hint::unreachable_unchecked(), - } - } - - new.expire(py); - new.shrink_to_fit(py); - - *self = new; - Ok(()) - } - } -} - -impl<'a> TTLPolicyOccupied<'a> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ) -> pyo3::PyResult> { - let new_size = { - let index = unsafe { *self.bucket.as_ref() } - self.instance.n_shifts; - let item = &self.instance.entries[index]; - crate::common::entry_size(py, &item.key, &value)? - }; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - // We have to move the value to the end of the vector - let (mut index, slot) = unsafe { self.instance.table.remove(self.bucket.clone()) }; - index -= self.instance.n_shifts; - - self.instance - .decrement_indexes(index + 1, self.instance.entries.len()); - - let mut item = self.instance.entries.remove(index).unwrap(); - let old_size = item.size; - item.expire_at = Some(std::time::SystemTime::now() + self.instance.ttl); - let old_value = std::mem::replace(&mut item.value, value); - item.size = new_size; - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - - unsafe { - self.instance.table.insert_in_slot( - item.key.hash, - slot, - self.instance.entries.len() + self.instance.n_shifts, - ); - - self.instance.entries.push_back(item); - } - - self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem(py)?.is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> TimeToLivePair { - let (mut index, _) = unsafe { self.instance.table.remove(self.bucket) }; - index -= self.instance.n_shifts; - - self.instance - .decrement_indexes(index + 1, self.instance.entries.len()); - - let m = self.instance.entries.remove(index).unwrap(); - self.instance.memory = self.instance.memory.saturating_sub(m.size); - - self.instance.observed.change(); - m - } - - pub fn into_value(self) -> &'a mut TimeToLivePair { - let index = unsafe { self.bucket.as_ref() }; - &mut self.instance.entries[index - self.instance.n_shifts] - } -} - -impl TTLPolicyAbsent<'_> { - unsafe fn pickle_insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - expire_at: std::time::SystemTime, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - match self.situation { - AbsentSituation::Expired(_) => { - return Err(pyo3::PyErr::new::( - "pikcle object is suspicious!", - )) - } - AbsentSituation::Slot(slot) => unsafe { - // This means the key is not available and we have insert_slot - // for inserting it - - // We don't need to check maxsize, we sure `len(iterable) <= maxsize` in loading pickle - - self.instance.table.insert_in_slot( - key.hash, - slot, - self.instance.entries.len() + self.instance.n_shifts, - ); - - self.instance.entries.push_back(TimeToLivePair::new( - key, - value, - Some(expire_at), - entry_size, - )); - }, - AbsentSituation::None => unsafe { std::hint::unreachable_unchecked() }, - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - Ok(()) - } - - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ) -> pyo3::PyResult<()> { - let expire_at = std::time::SystemTime::now() + self.instance.ttl; - let entry_size = crate::common::entry_size(py, &key, &value)?; - - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - match self.situation { - AbsentSituation::Expired(bucket) => { - // This means the key is available but expired - // So we have to move the value to the end of the vector - // and update the bucket ( like TTLPolicyOccupied::update ) - let (mut index, slot) = unsafe { self.instance.table.remove(bucket) }; - index -= self.instance.n_shifts; - - self.instance - .decrement_indexes(index + 1, self.instance.entries.len()); - - let mut item = self.instance.entries.remove(index).unwrap(); - let old_size = item.size; - - item.expire_at = Some(expire_at); - item.value = value; - item.size = entry_size; - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(entry_size); - - unsafe { - self.instance.table.insert_in_slot( - item.key.hash, - slot, - self.instance.entries.len() + self.instance.n_shifts, - ); - - self.instance.entries.push_back(item); - } - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem(py)?.is_none() { - break; - } - } - } - AbsentSituation::Slot(slot) => unsafe { - // This means the key is not available and we have insert_slot - // for inserting it - - self.instance.expire(py); // Remove expired pairs to make room for the new pair - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) - > self.instance.maxmemory.get() - { - if self.instance.popitem(py)?.is_none() { - break; - } - } - - self.instance.table.insert_in_slot( - key.hash, - slot, - self.instance.entries.len() + self.instance.n_shifts, - ); - - self.instance.entries.push_back(TimeToLivePair::new( - key, - value, - Some(expire_at), - entry_size, - )); - self.instance.memory = self.instance.memory.saturating_add(entry_size); - }, - AbsentSituation::None => { - // This is same as AbsentSituation::Slot but we don't have any slot - - self.instance.expire(py); // Remove expired pairs to make room for the new pair - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) - > self.instance.maxmemory.get() - { - if self.instance.popitem(py)?.is_none() { - break; - } - } - - self.instance.table.insert( - key.hash, - self.instance.entries.len() + self.instance.n_shifts, - |index| { - self.instance.entries[(*index) - self.instance.n_shifts] - .key - .hash - }, - ); - - self.instance.entries.push_back(TimeToLivePair::new( - key, - value, - Some(expire_at), - entry_size, - )); - self.instance.memory = self.instance.memory.saturating_add(entry_size); - } - } - - self.instance.observed.change(); - Ok(()) - } -} - -impl Iterator for TTLIterator { - type Item = std::ptr::NonNull; - - fn next(&mut self) -> Option { - match self.first.next() { - Some(val) => Some(val), - None => { - core::mem::swap(&mut self.first, &mut self.second); - self.first.next() - } - } - } -} - -unsafe impl Send for TTLIterator {} diff --git a/src/policies/vttl.rs b/src/policies/vttl.rs deleted file mode 100644 index 199ec58..0000000 --- a/src/policies/vttl.rs +++ /dev/null @@ -1,597 +0,0 @@ -use crate::common::AbsentSituation; -use crate::common::Entry; -use crate::common::Observed; -use crate::common::PreHashObject; -use crate::common::TimeToLivePair; -use crate::common::TryFindMethods; -use crate::lazyheap; - -use std::ptr::NonNull; - -macro_rules! compare_fn { - () => { - |a, b| { - if a.expire_at.is_none() && b.expire_at.is_none() { - return std::cmp::Ordering::Equal; - } else if b.expire_at.is_none() { - return std::cmp::Ordering::Less; - } else if a.expire_at.is_none() { - return std::cmp::Ordering::Greater; - } - - a.expire_at.cmp(&b.expire_at) - } - }; -} - -pub struct VTTLPolicy { - table: hashbrown::raw::RawTable>, - heap: lazyheap::LazyHeap, - maxsize: std::num::NonZeroUsize, - maxmemory: std::num::NonZeroUsize, - memory: usize, - pub observed: Observed, -} - -pub struct VTTLPolicyOccupied<'a> { - instance: &'a mut VTTLPolicy, - bucket: hashbrown::raw::Bucket>, -} - -pub struct VTTLPolicyAbsent<'a> { - instance: &'a mut VTTLPolicy, - situation: AbsentSituation>, -} - -pub type VTTLIterator = lazyheap::Iter; - -impl VTTLPolicy { - pub fn new(maxsize: usize, mut capacity: usize, maxmemory: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - let maxmemory = non_zero_or!(maxmemory, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - heap: lazyheap::LazyHeap::new(), - maxsize, - maxmemory, - memory: 0, - observed: Observed::new(), - }) - } - - pub fn maxsize(&self) -> usize { - self.maxsize.get() - } - - pub fn maxmemory(&self) -> usize { - self.maxmemory.get() - } - - pub fn memory(&self) -> usize { - self.memory - } - - #[inline] - pub fn real_len(&mut self) -> usize { - self.expire(); - self.table.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.table.is_empty() - } - - pub fn is_full(&self) -> bool { - self.table.len() == self.maxsize.get() || self.memory >= self.maxmemory.get() - } - - pub fn capacity(&self) -> usize { - self.table.capacity() - } - - #[inline] - pub fn expire(&mut self) { - self.heap.sort_by(compare_fn!()); - - let now = std::time::SystemTime::now(); - - while let Some(x) = self.heap.front() { - if unsafe { !x.as_ref().is_expired(now) } { - break; - } - - unsafe { - self.table - .remove_entry(x.as_ref().key.hash, |x| { - std::ptr::eq(x.as_ptr(), x.as_ptr()) - }) - .unwrap(); - } - - let removed = self.heap.pop_front(compare_fn!()); - if let Some(pair) = removed { - self.memory = self.memory.saturating_sub(pair.size); - } - self.observed.change(); - } - } - - #[inline] - pub fn popitem(&mut self) -> Option { - self.heap.sort_by(compare_fn!()); - - let front = self.heap.front()?; - - unsafe { - self.table - .remove_entry(front.as_ref().key.hash, |x| { - std::ptr::eq(x.as_ptr(), front.as_ptr()) - }) - .unwrap(); - } - - self.observed.change(); - let item = self.heap.pop_front(compare_fn!()).unwrap(); - self.memory = self.memory.saturating_sub(item.size); - Some(item) - } - - #[inline] - #[rustfmt::skip] - pub fn entry( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, VTTLPolicyAbsent<'_>>> { - match self - .table - .try_find(key.hash, |ptr| unsafe { ptr.as_ref().key.equal(py, key) })? - { - Some(bucket) => unsafe { - let pair = bucket.as_ref(); - - if !pair.as_ref().is_expired(std::time::SystemTime::now()) { - Ok(Entry::Occupied(VTTLPolicyOccupied { instance: self, bucket })) - } else { - Ok(Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) - } - } - None => { - Ok( - Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::None }) - ) - }, - } - } - - #[inline] - #[rustfmt::skip] - pub fn entry_with_slot( - &'_ mut self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult, VTTLPolicyAbsent<'_>>> { - match self - .table - .try_find_or_find_insert_slot( - key.hash, - |ptr| unsafe { ptr.as_ref().key.equal(py, key) }, - |ptr| unsafe { ptr.as_ref().key.hash }, - )? { - Ok(bucket) => unsafe { - let pair = bucket.as_ref(); - - if !pair.as_ref().is_expired(std::time::SystemTime::now()) { - Ok(Entry::Occupied(VTTLPolicyOccupied { instance: self, bucket })) - } else { - Ok(Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) - } - } - Err(slot) => { - Ok( - Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::Slot(slot) }) - ) - }, - } - } - - #[inline] - pub fn lookup( - &self, - py: pyo3::Python<'_>, - key: &PreHashObject, - ) -> pyo3::PyResult> { - match self - .table - .try_find(key.hash, |ptr| unsafe { ptr.as_ref().key.equal(py, key) })? - .map(|bucket| unsafe { bucket.as_ref() }) - { - Some(pair) => unsafe { - if !pair.as_ref().is_expired(std::time::SystemTime::now()) { - Ok(Some(pair.as_ref())) - } else { - Ok(None) - } - }, - None => Ok(None), - } - } - - pub fn clear(&mut self) { - self.table.clear(); - self.heap.clear(); - self.memory = 0; - self.observed.change(); - } - - pub fn shrink_to_fit(&mut self) { - self.table - .shrink_to(self.table.len(), |x| unsafe { x.as_ref().key.hash }); - - self.heap.shrink_to_fit(); - self.observed.change(); - } - - pub fn iter(&mut self) -> VTTLIterator { - self.heap.iter(compare_fn!()) - } - - pub fn equal(&mut self, py: pyo3::Python<'_>, other: &mut Self) -> pyo3::PyResult { - if self.maxsize != other.maxsize { - return Ok(false); - } - - if self.maxmemory != other.maxmemory { - return Ok(false); - } - - if self.real_len() != other.real_len() { - return Ok(false); - } - - unsafe { - for node in self.table.iter().map(|x| x.as_ref()) { - let pair1 = node.as_ref(); - - // NOTE: there's no need to check if the pair is expired - // because we already expired all expired pairs by using real_len method - - match other - .table - .try_find(pair1.key.hash, |x| pair1.key.equal(py, &x.as_ref().key))? - { - Some(bucket) => { - let pair2 = bucket.as_ref().as_ref(); - - if !crate::common::pyobject_equal( - py, - pair1.value.as_ptr(), - pair2.value.as_ptr(), - )? { - return Ok(false); - } - } - None => return Ok(false), - } - } - } - - Ok(true) - } - - #[inline] - pub fn extend( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { iterable.cast_bound_unchecked::(py) }; - - for (key, value) in dict.iter() { - let hk = - unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value.unbind(), ttl)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value.unbind(), ttl)?; - } - } - } - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = - pair?.extract::<(pyo3::Py, pyo3::Py)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - match self.entry_with_slot(py, &hk)? { - Entry::Occupied(entry) => { - entry.update(py, value, ttl)?; - } - Entry::Absent(entry) => { - entry.insert(py, hk, value, ttl)?; - } - } - } - } - - Ok(()) - } - - #[allow(clippy::wrong_self_convention)] - pub fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - - unsafe { - let (maxsize, iterable, capacity, maxmemory) = extract_pickle_tuple!(py, state => list); - - // SAFETY: we check `iterable` type in `extract_pickle_tuple` macro - if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { - return Err(pyo3::PyErr::new::( - "iterable object size is greater than maxsize", - )); - } - - let mut new = Self::new(maxsize, capacity, maxmemory)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value, timestamp) = - pair?.extract::<(pyo3::Py, pyo3::Py, f64)>()?; - - let hk = PreHashObject::from_pyobject(py, key)?; - - let ttl = { - if timestamp == 0.0 { - None - } else { - Some(std::time::UNIX_EPOCH + std::time::Duration::from_secs_f64(timestamp)) - } - }; - - match new.entry_with_slot(py, &hk)? { - Entry::Absent(entry) => { - entry.pickle_insert(py, hk, value, ttl)?; - } - _ => std::hint::unreachable_unchecked(), - } - } - - new.expire(); - new.shrink_to_fit(); - - *self = new; - Ok(()) - } - } -} - -impl VTTLPolicyOccupied<'_> { - #[inline] - pub fn update( - self, - py: pyo3::Python<'_>, - value: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult> { - let old_value; - { - let item = unsafe { self.bucket.as_mut() }; - let pair = unsafe { item.as_mut() }; - let new_size = crate::common::entry_size(py, &pair.key, &value)?; - - if new_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - let old_size = pair.size; - old_value = std::mem::replace(&mut pair.value, value); - pair.size = new_size; - pair.expire_at = - ttl.map(|x| std::time::SystemTime::now() + std::time::Duration::from_secs_f64(x)); - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(new_size); - } - self.instance.heap.queue_sort(); - - // In update we don't need to change this; because this does not change the memory address ranges - // self.instance.observed.change(); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem().is_none() { - break; - } - } - - Ok(old_value) - } - - #[inline] - pub fn remove(self) -> TimeToLivePair { - let (item, _) = unsafe { self.instance.table.remove(self.bucket) }; - let item = self.instance.heap.remove(item, compare_fn!()); - - self.instance.memory = self.instance.memory.saturating_sub(item.size); - self.instance.observed.change(); - item - } - - pub fn into_value(self) -> NonNull { - let item = unsafe { self.bucket.as_mut() }; - *item - } -} - -impl VTTLPolicyAbsent<'_> { - unsafe fn pickle_insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - expire_at: Option, - ) -> pyo3::PyResult<()> { - let entry_size = crate::common::entry_size(py, &key, &value)?; - if entry_size > self.instance.maxmemory.get() - || self.instance.memory.saturating_add(entry_size) > self.instance.maxmemory.get() - { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - match self.situation { - AbsentSituation::Expired(_) => { - return Err(pyo3::PyErr::new::( - "pikcle object is suspicious!", - )) - } - AbsentSituation::Slot(slot) => { - // This means the key is not available and we have insert_slot - // for inserting it - - // We don't need to check maxsize, we sure `len(iterable) <= maxsize` in loading pickle - - let hash = key.hash; - let node = self - .instance - .heap - .push(TimeToLivePair::new(key, value, expire_at, entry_size)); - - unsafe { - self.instance.table.insert_in_slot(hash, slot, node); - } - } - AbsentSituation::None => unsafe { std::hint::unreachable_unchecked() }, - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - Ok(()) - } - - #[inline] - pub fn insert( - self, - py: pyo3::Python<'_>, - key: PreHashObject, - value: pyo3::Py, - ttl: Option, - ) -> pyo3::PyResult<()> { - let expire_at = - ttl.map(|x| std::time::SystemTime::now() + std::time::Duration::from_secs_f64(x)); - let entry_size = crate::common::entry_size(py, &key, &value)?; - - if entry_size > self.instance.maxmemory.get() { - return Err(pyo3::PyErr::new::( - "The cache has reached the bound", - )); - } - - match self.situation { - AbsentSituation::Expired(bucket) => { - // This means the key is available but expired - // So we have to update the values of the old key - // and queue the heap's sort - let old_size = unsafe { bucket.as_ref().as_ref().size }; - - { - let item = unsafe { bucket.as_mut() }; - unsafe { - item.as_mut().expire_at = ttl.map(|x| { - std::time::SystemTime::now() + std::time::Duration::from_secs_f64(x) - }); - item.as_mut().value = value; - item.as_mut().size = entry_size; - } - } - - self.instance.heap.queue_sort(); - self.instance.memory = self - .instance - .memory - .saturating_sub(old_size) - .saturating_add(entry_size); - - while self.instance.memory > self.instance.maxmemory.get() { - if self.instance.popitem().is_none() { - break; - } - } - - // Like VTTLPolicyOccupied::update, Here we don't need to change this - // self.instance.observed.change(); - } - AbsentSituation::Slot(slot) => { - self.instance.expire(); // Remove expired pairs to make room for the new pair - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) - > self.instance.maxmemory.get() - { - if self.instance.popitem().is_none() { - break; - } - } - - let hash = key.hash; - let node = self - .instance - .heap - .push(TimeToLivePair::new(key, value, expire_at, entry_size)); - - unsafe { - self.instance.table.insert_in_slot(hash, slot, node); - } - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - } - AbsentSituation::None => { - self.instance.expire(); // Remove expired pairs to make room for the new pair - - while self.instance.table.len() >= self.instance.maxsize.get() - || self.instance.memory.saturating_add(entry_size) - > self.instance.maxmemory.get() - { - if self.instance.popitem().is_none() { - break; - } - } - - let hash = key.hash; - let node = self - .instance - .heap - .push(TimeToLivePair::new(key, value, expire_at, entry_size)); - - self.instance - .table - .insert(hash, node, |x| unsafe { x.as_ref().key.hash }); - - self.instance.memory = self.instance.memory.saturating_add(entry_size); - self.instance.observed.change(); - } - } - - Ok(()) - } -} - -unsafe impl Send for VTTLPolicy {} diff --git a/src/pyclasses/base.rs b/src/pyclasses/base.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/pyclasses/mod.rs b/src/pyclasses/mod.rs new file mode 100644 index 0000000..e69de29 From b6eafc6c6f559e6ee5141f458f9c6dd75f667a7c Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 18 May 2026 20:08:04 +0330 Subject: [PATCH 03/60] Refactor Cache ( not tested ) --- Cargo.lock | 106 +++++- Cargo.toml | 3 +- cachebox/_core.pyi | 313 ++++++++++++++++ src/hashbrown/raw.rs | 37 +- src/internal/alias.rs | 10 +- src/internal/genver.rs | 27 -- src/internal/mod.rs | 1 - src/internal/onceinit.rs | 101 ++++- src/internal/pickle.rs | 723 ++++++++++++++++++++++++++++++++++-- src/internal/utils.rs | 321 ++++++++++++++++ src/lib.rs | 30 +- src/macro_rules.rs | 54 +++ src/policies/mod.rs | 5 + src/policies/nopolicy.rs | 379 +++++++++++++++++++ src/policies/traits.rs | 120 ++++++ src/policies/wrapped.rs | 203 ++++++++++ src/pyclasses/base.rs | 62 ++++ src/pyclasses/cache/mod.rs | 6 + src/pyclasses/cache/sync.rs | 596 +++++++++++++++++++++++++++++ src/pyclasses/mod.rs | 2 + src/typeref.rs | 23 ++ 21 files changed, 3016 insertions(+), 106 deletions(-) delete mode 100644 src/internal/genver.rs create mode 100644 src/policies/traits.rs create mode 100644 src/policies/wrapped.rs create mode 100644 src/pyclasses/cache/mod.rs create mode 100644 src/pyclasses/cache/sync.rs create mode 100644 src/typeref.rs diff --git a/Cargo.lock b/Cargo.lock index e879d6b..a4e2019 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,23 +10,21 @@ checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" [[package]] name = "cachebox" -<<<<<<< Updated upstream -version = "5.2.3" -======= version = "6.0.0" ->>>>>>> Stashed changes dependencies = [ "cfg-if", "parking_lot", "pyo3", + "pyo3-async-runtimes", "pyo3-build-config", + "tokio", ] [[package]] name = "cc" -version = "1.2.60" +version = "1.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" dependencies = [ "find-msvc-tools", "shlex", @@ -44,12 +42,66 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-macro", + "futures-task", + "pin-project-lite", + "slab", +] + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "inventory" +version = "0.3.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b" +dependencies = [ + "rustversion", +] + [[package]] name = "libc" version = "0.2.186" @@ -94,6 +146,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + [[package]] name = "portable-atomic" version = "1.13.1" @@ -115,6 +173,7 @@ version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ + "inventory", "libc", "once_cell", "portable-atomic", @@ -123,6 +182,20 @@ dependencies = [ "pyo3-macros", ] +[[package]] +name = "pyo3-async-runtimes" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e7364a95bf00e8377bbf9b0f09d7ff9715a29d8fcf93b47d1a967363b973178" +dependencies = [ + "futures-channel", + "futures-util", + "once_cell", + "pin-project-lite", + "pyo3", + "tokio", +] + [[package]] name = "pyo3-build-config" version = "0.28.3" @@ -195,6 +268,12 @@ dependencies = [ "bitflags", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "scopeguard" version = "1.2.0" @@ -207,6 +286,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "smallvec" version = "1.15.1" @@ -230,6 +315,15 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" +[[package]] +name = "tokio" +version = "1.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" +dependencies = [ + "pin-project-lite", +] + [[package]] name = "unicode-ident" version = "1.0.24" diff --git a/Cargo.toml b/Cargo.toml index b69d46a..3e89e41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,8 @@ strip = true [dependencies] cfg-if = "1.0.4" parking_lot = {version="0.12.5", default-features=false} -pyo3 = {version="0.28.3", default-features=false, features=["macros", "generate-import-lib"]} +pyo3 = {version="0.28.3", default-features=false, features=["macros", "generate-import-lib", "multiple-pymethods"]} +tokio = {version="1.52.3", default-features=false, features=["sync"]} [build-dependencies] pyo3-build-config = {version="0.28.3", default-features=false, features=["resolve-config"]} diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index e69de29..86aac17 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -0,0 +1,313 @@ +import typing + +from _typeshed import SupportsItems + +KT = typing.TypeVar("KT") +VT = typing.TypeVar("VT") +DT = typing.TypeVar("DT") + +class BaseCacheImpl(typing.Generic[KT, VT]): + """ + Base implementation for cache classes in the cachebox library. + + This abstract base class defines the generic structure for cache implementations, + supporting different key and value types through generic type parameters. + Serves as a foundation for specific cache variants like Cache and FIFOCache. + """ + + def __init__( + self, + maxsize: int, + iterable: ( + typing.Dict[KT, VT] + | SupportsItems[KT, VT] + | typing.Iterable[typing.Tuple[KT, VT]] + | None + ) = None, + *, + capacity: int = 0, + getsizeof: typing.Callable[[KT, VT]] | None = None, + ) -> None: ... + def maxsize(self) -> int: ... + def current_size(self) -> int: ... + def remaining_size(self) -> int: ... + def getsizeof(self) -> typing.Callable[[KT, VT]] | None: ... + def capacity(self) -> int: ... + def __len__(self) -> int: ... + def __sizeof__(self) -> int: ... + def __bool__(self) -> bool: ... + def __contains__(self, key: KT) -> bool: ... + def contains(self, key: KT) -> bool: ... + def is_empty(self) -> bool: ... + def is_full(self) -> bool: ... + def insert( + self, key: KT, value: VT, *args: typing.Any, **kwargs: typing.Any + ) -> typing.Optional[VT]: ... + def __setitem__(self, key: KT, value: VT) -> None: ... + def update( + self, + iterable: ( + typing.Dict[KT, VT] + | SupportsItems[KT, VT] + | typing.Iterable[typing.Tuple[KT, VT]] + ), + *args: typing.Any, + **kwargs: typing.Any, + ) -> None: ... + def get( + self, key: KT, default: typing.Optional[DT] = None + ) -> typing.Union[VT, DT]: ... + def __getitem__(self, key: KT) -> VT: ... + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + *args: typing.Any, + **kwargs: typing.Any, + ) -> typing.Optional[VT | DT]: ... + def pop(self, key: KT, default: DT = ...) -> typing.Union[VT, DT]: ... + def __delitem__(self, key: KT) -> None: ... + def popitem(self) -> typing.Tuple[KT, VT]: ... + def drain(self, n: int) -> int: ... + def shrink_to_fit(self) -> None: ... + def clear(self, *, reuse: bool = False) -> None: ... + def __eq__(self, other: typing.Any) -> bool: ... + def __ne__(self, other: typing.Any) -> bool: ... + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: ... + def values(self) -> typing.Iterable[VT]: ... + def keys(self) -> typing.Iterable[KT]: ... + def __iter__(self) -> typing.Iterator[KT]: ... + def copy(self) -> typing.Self: ... + def __copy__(self) -> typing.Self: ... + def __repr__(self) -> str: ... + +class Cache(BaseCacheImpl[KT, VT]): + """ + A thread-safe, memory-efficient key-value cache with no eviction policy. + items remain in the cache until manually removed or the cache is cleared. + + ## How It Works + `Cache` is essentially a configurable hashmap-like store. When an item is inserted: + - It is stored directly without any ordering, priority tracking, or access metadata. + - If a maximum size is configured, insertions beyond that limit are rejected (raises OverflowError). + A max size of zero means unlimited. + - All read and write operations are thread-safe, making it safe for concurrent access without + external locking. + + Because no eviction logic runs in the background, there is no overhead from tracking usage order, + frequency counters, or expiry timestamps. + + ### Pros + - Minimal overhead - no bookkeeping for eviction means lower CPU and memory usage per entry compared + to policy-based caches. + - Predictable behavior - items are never silently removed, so cache hits are deterministic once an + item is stored. + - Thread-safe - safe for concurrent reads and writes out of the box. + - Configurable capacity - a hard size limit prevents unbounded memory growth. + + ### Cons + - No automatic eviction - the cache can fill up and stop accepting new entries if a max size is set, + requiring manual management. + - Unordered - unlike a standard dict (Python 3.7+), insertion order is not preserved. + - Not suitable for volatile data - stale entries persist forever unless explicitly invalidated. + + ## When to Use It + `Cache` is the right choice when: + - You have a fixed, well-known set of keys that are expensive to compute and never go stale + (e.g., parsed config values, compiled regex patterns, loaded templates). + - The cached data has no meaningful expiry - it's either always valid or always explicitly invalidated. + - You need the lowest possible overhead and can guarantee the cache won't grow uncontrollably. + + Avoid it when cached data can become stale, when the working set is unpredictable in size, or when you need automatic + memory pressure relief. + """ + + def __init__( + self, + maxsize: int, + iterable: ( + typing.Dict[KT, VT] + | SupportsItems[KT, VT] + | typing.Iterable[typing.Tuple[KT, VT]] + | None + ) = ..., + *, + capacity: int = ..., + getsizeof: typing.Callable[[KT, VT]] | None = ..., + ) -> None: + """ + Initialize a new Cache instance. + + Args: + maxsize: Maximum number of elements the cache can hold. Zero means unlimited. + iterable: Initial data to populate the cache. + capacity: Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. When `None`, each + entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + Use this to implement weighted caching — for example, sizing entries by + memory footprint or byte length. + + The cache can be pre-sized via `capacity` to reduce hash table reallocations when + the number of expected entries is known ahead of time. + """ + ... + + def maxsize(self) -> int: + """Returns the specified `maxsize`""" + ... + + def current_size(self) -> int: + """Returns the current total cumulative size consumed by all stored entries.""" + ... + + def remaining_size(self) -> int: + """Returns the remaining size. Equals to `maxsize - current_size`""" + ... + + def getsizeof(self) -> typing.Callable[[KT, VT]] | None: + """Returns the `getsizeof` function""" + ... + + def capacity(self) -> int: + """Returns the number of elements the map can hold without reallocating.""" + ... + + def __len__(self) -> int: + """Returns the number of entries currently in the cache.""" + ... + + def contains(self, key: KT) -> bool: + """ + Returns `true` if the cache contains an entry for `key`. Equals to `key in self`. + + It's recommended to use this method instead of `key in self`. + """ + ... + + def is_empty(self) -> bool: + """Returns `True` if cache is empty. Exactly like `bool(self)`.""" + ... + + def is_full(self) -> bool: + """Returns `True` when the cumulative size has reached the maxsize limit.""" + ... + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + + It's recommended to use this method instead of `self[key] = value`. + + Note: raises `OverflowError` if the cache reached the maxsize limit, + because this class does not have any algorithm. + """ + ... + + def update( + self, + iterable: ( + typing.Dict[KT, VT] + | SupportsItems[KT, VT] + | typing.Iterable[typing.Tuple[KT, VT]] + ), + ) -> None: + """ + Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + """ + ... + + def get( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a given key from the cache. + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + + Args: + key: The key to look up in the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + The value associated with the key, or the default value if the key is not found. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + *args: typing.Any, + **kwargs: typing.Any, + ) -> typing.Optional[VT | DT]: + """ + Inserts key with a value of default if key is not in the cache. + + Returns the value for key if key is in the cache, else default. + """ + ... + + def pop(self, key: KT, default: DT = ...) -> typing.Union[VT, DT]: + """ + Removes specified key and returns the corresponding value. + + If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """Always raises `NotImplementedError` because `Cache` has neither policy nor algorithm to evict items.""" + ... + + def drain(self, n: int) -> int: + """Calls the `popitem()` `n` times and returns count of removed items.""" + ... + + def shrink_to_fit(self) -> None: + """Shrinks the internal allocation as close to the current length as possible.""" + ... + + def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from cache. + + If `reuse` is True, will not free the memory for reusing in the future. + """ + ... + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + - Items are not ordered. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Keys are not ordered. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Values are not ordered. + """ + ... diff --git a/src/hashbrown/raw.rs b/src/hashbrown/raw.rs index 64752d5..f6b2e69 100644 --- a/src/hashbrown/raw.rs +++ b/src/hashbrown/raw.rs @@ -1,6 +1,12 @@ -use super::control::{BitMaskIter, Group, Tag, TagSliceExt}; -use super::scopeguard::{guard, ScopeGuard}; -use super::util::{invalid_mut, likely, unlikely}; +use super::control::BitMaskIter; +use super::control::Group; +use super::control::Tag; +use super::control::TagSliceExt; +use super::scopeguard::guard; +use super::scopeguard::ScopeGuard; +use super::util::invalid_mut; +use super::util::likely; +use super::util::unlikely; use super::TryReserveError; use core::array; use core::iter::FusedIterator; @@ -9,11 +15,14 @@ use core::mem; use core::ptr; use core::ptr::NonNull; use core::slice; -use std::alloc::{handle_alloc_error, Layout}; +use std::alloc::handle_alloc_error; +use std::alloc::Layout; +use super::alloc::do_alloc; #[cfg(test)] use super::alloc::AllocError; -use super::alloc::{do_alloc, Allocator, Global}; +use super::alloc::Allocator; +use super::alloc::Global; #[inline] unsafe fn offset_from(to: *const T, from: *const T) -> usize { @@ -800,7 +809,7 @@ impl RawTable { /// and the former `Tag` for that bucket. #[cfg_attr(feature = "inline-more", inline)] #[expect(clippy::needless_pass_by_value)] - pub unsafe fn remove_tagged(&mut self, item: Bucket) -> (T, usize, Tag) { + pub(crate) unsafe fn remove_tagged(&mut self, item: Bucket) -> (T, usize, Tag) { unsafe { let index = self.bucket_index(&item); let tag = *self.table.ctrl(index); @@ -1083,7 +1092,7 @@ impl RawTable { /// /// This does not check if the given bucket is actually occupied. #[cfg_attr(feature = "inline-more", inline)] - pub unsafe fn replace_bucket_with(&mut self, bucket: Bucket, f: F) -> Option + pub(crate) unsafe fn replace_bucket_with(&mut self, bucket: Bucket, f: F) -> Option where F: FnOnce(T) -> Option, { @@ -1164,7 +1173,12 @@ impl RawTable { /// `find_or_find_insert_index`, and no mutation of the table must have /// occurred since that call. #[inline] - pub unsafe fn insert_tagged_at_index(&mut self, tag: Tag, index: usize, value: T) -> Bucket { + pub(crate) unsafe fn insert_tagged_at_index( + &mut self, + tag: Tag, + index: usize, + value: T, + ) -> Bucket { unsafe { let old_ctrl = *self.table.ctrl(index); self.table.record_item_insert_at(index, old_ctrl, tag); @@ -4408,8 +4422,11 @@ mod test_map { #[test] #[cfg(panic = "unwind")] fn test_catch_panic_clone_from() { - use super::{AllocError, Allocator, Global}; - use core::sync::atomic::{AtomicI8, Ordering}; + use super::AllocError; + use super::Allocator; + use super::Global; + use core::sync::atomic::AtomicI8; + use core::sync::atomic::Ordering; use std::sync::Arc; use std::thread; use std::vec::Vec; diff --git a/src/internal/alias.rs b/src/internal/alias.rs index cb2ac4f..6457bb7 100644 --- a/src/internal/alias.rs +++ b/src/internal/alias.rs @@ -1,3 +1,5 @@ +//! There are type aliases that are used whole the library + /// Type alias for `pyo3::Py` pub type PyObject = pyo3::Py; @@ -5,9 +7,9 @@ pub type PyObject = pyo3::Py; pub type BoundObject<'a> = pyo3::Bound<'a, pyo3::PyAny>; /// Type alias for `&'a pyo3::Bound<'a, pyo3::types::PyTuple>`. -/// Use it directly as `args` argument type. -pub type BoundArgs<'a> = &'a pyo3::Bound<'a, pyo3::types::PyTuple>; +/// Use it directly as `*args` argument type. +pub type ArgsType<'a> = &'a pyo3::Bound<'a, pyo3::types::PyTuple>; /// Type alias for `&'a pyo3::Bound<'a, pyo3::types::PyDict>`. -/// Use it directly as `kwds` argument type. -pub type BoundKwargs<'a> = &'a pyo3::Bound<'a, pyo3::types::PyDict>; +/// Use it directly as `**kwds` argument type. +pub type KwdsType<'a> = &'a pyo3::Bound<'a, pyo3::types::PyDict>; diff --git a/src/internal/genver.rs b/src/internal/genver.rs deleted file mode 100644 index 8f161f1..0000000 --- a/src/internal/genver.rs +++ /dev/null @@ -1,27 +0,0 @@ -use std::sync::atomic; -use std::sync::Arc; - -/// Generation version implementation -/// -/// Very useful for checking changes while iteration, like what CPython does; -/// because we can't use lifetimes. -#[derive(Debug, Clone)] -#[repr(transparent)] -pub struct GenerationVersion(Arc); - -impl GenerationVersion { - #[inline] - pub fn new() -> Self { - Self(Default::default()) - } - - #[inline] - pub fn increment(&self) -> u32 { - self.0.fetch_add(1, atomic::Ordering::SeqCst) - } - - #[inline] - pub fn get(&self) -> u32 { - self.0.load(atomic::Ordering::Relaxed) - } -} diff --git a/src/internal/mod.rs b/src/internal/mod.rs index c4c32d5..ffb56f2 100644 --- a/src/internal/mod.rs +++ b/src/internal/mod.rs @@ -1,5 +1,4 @@ pub mod alias; -pub mod genver; pub mod onceinit; pub mod pickle; pub mod utils; diff --git a/src/internal/onceinit.rs b/src/internal/onceinit.rs index ae1bde0..0445db0 100644 --- a/src/internal/onceinit.rs +++ b/src/internal/onceinit.rs @@ -1,29 +1,80 @@ +//! According to PyO3 updates, we can write `__init__` methods inside the Rust, which allows developers +//! to use classes as subclass in Python. +//! +//! All of classes must implement `__new__` and `__init__` methods. +//! - In `__new__` methods, we should allocate memory for the type; +//! - And in `__init__` methods, we should initialize and constrcut the type, according to parameters. +//! +//! There are types that help us to create these methods completely thread-safe. + use std::cell; use std::mem; use std::sync::atomic; +use std::sync::Arc; const UNINIT: u8 = 0; const RUNNING: u8 = 1; const INIT: u8 = 2; -#[repr(align(64))] -pub struct OnceInit { +pub struct OnceInitInner { + /// Tracks the lifecycle of the inner value: + /// `UNINIT` → `RUNNING` (mid-write) → `INIT` (ready). state: atomic::AtomicU8, - value: cell::UnsafeCell>>, + /// Heap-allocated storage that is uninitialized until [`set`](OnceInit::set) completes. + /// Wrapped in a [`std::sync::Mutex`] so that post-init access is safe across threads. + value: cell::UnsafeCell>>, } +/// A thread-safe, write-once container for PyO3 `__new__` / `__init__` two-phase construction. +/// +/// PyO3 splits Python object creation into two steps: +/// - `__new__` allocates the Rust-side storage (calls [`OnceInit::uninit`]), +/// - `__init__` fills it in exactly once (calls [`OnceInit::set`]). +/// +/// After initialisation the inner value is accessible through a [`std::sync::MutexGuard`] +/// via [`OnceInit::lock`], which is safe to call from multiple threads simultaneously. +#[repr(transparent)] +pub struct OnceInit(Arc>); + impl OnceInit { + /// Creates a new, **uninitialized** [`OnceInit`]. + /// + /// Intended to be called from the PyO3 `__new__` handler to allocate the + /// object slot before Python passes arguments to `__init__`. + /// + /// The returned value must not be accessed via [`lock`](Self::lock) + /// until [`set`](Self::set) has been called. #[inline] pub fn uninit() -> Self { - Self { + OnceInitInner { state: atomic::AtomicU8::new(UNINIT), value: cell::UnsafeCell::new(mem::MaybeUninit::uninit()), } + .into() + } + + /// Creates a new **initialized** [`OnceInit`]. + #[inline] + pub fn new(val: T) -> Self { + OnceInitInner { + state: atomic::AtomicU8::new(INIT), + value: cell::UnsafeCell::new(mem::MaybeUninit::new(std::sync::Mutex::new(val))), + } + .into() } + /// Initializes the container with `val`, transitioning state from `UNINIT` to `INIT`. + /// + /// Intended to be called from the PyO3 `__init__` handler once the Python-side + /// arguments have been validated and the Rust value can be constructed. + /// + /// # Panics + /// + /// Panics if `set` has already been called on this instance. #[inline] pub fn set(&self, val: T) { if self + .0 .state .compare_exchange( UNINIT, @@ -36,40 +87,68 @@ impl OnceInit { already_init_panic(); } // SAFETY: we own the RUNNING token — no other thread can write value. - unsafe { (*self.value.get()).write(parking_lot::Mutex::new(val)) }; - self.state.store(INIT, atomic::Ordering::Release); + unsafe { (*self.0.value.get()).write(std::sync::Mutex::new(val)) }; + self.0.state.store(INIT, atomic::Ordering::Release); } + /// Locks the inner [`std::sync::Mutex`] and returns a guard that dereferences to `T`. + /// + /// This is the primary read/write accessor after initialization. Multiple threads + /// may call `lock` concurrently; they will be serialized by the inner mutex. + /// + /// # Panics + /// + /// Panics if called before [`set`](Self::set) has completed. #[inline] - pub fn lock(&self) -> parking_lot::MutexGuard<'_, T> { - if std::hint::likely(self.state.load(atomic::Ordering::Acquire) == INIT) { + pub fn lock(&self) -> std::sync::MutexGuard<'_, T> { + if std::hint::likely(self.0.state.load(atomic::Ordering::Acquire) == INIT) { // SAFETY: state == INIT guarantees `value` was fully written and is valid. - unsafe { (*self.value.get()).assume_init_ref().lock() } + unsafe { (*self.0.value.get()).assume_init_ref().lock().unwrap() } } else { not_init_panic() } } } +impl Clone for OnceInit { + fn clone(&self) -> Self { + Self(Arc::clone(&self.0)) + } +} + +impl From> for OnceInit { + fn from(value: OnceInitInner) -> Self { + Self(Arc::new(value)) + } +} + // SAFETY: Mutex is Send+Sync when T: Send; we uphold the init invariant ourselves. unsafe impl Send for OnceInit {} unsafe impl Sync for OnceInit {} impl Drop for OnceInit { + /// Drops the inner value if and only if [`set`](OnceInit::set) was called. + /// + /// Checks the state flag without any atomic synchronisation since `drop` + /// requires `&mut self`, guaranteeing exclusive access. fn drop(&mut self) { - if *self.state.get_mut() == INIT { + if unsafe { *self.0.state.as_ptr() == INIT } { // SAFETY: state == INIT means value was written and not yet dropped. - unsafe { (*self.value.get()).assume_init_drop() } + unsafe { (*self.0.value.get()).assume_init_drop() } } } } +/// Marked `#[cold]` and `#[inline(never)]` so it is compiled as a separate, +/// rarely-executed stub and does not bloat the hot path of [`lock`](OnceInit::lock). #[cold] #[inline(never)] fn not_init_panic() -> ! { panic!("Object not initialized (__init__ not called)") } +/// Marked `#[cold]` and `#[inline(never)]` so it is compiled as a separate, +/// rarely-executed stub and does not bloat the hot path of [`set`](OnceInit::set). #[cold] #[inline(never)] fn already_init_panic() -> ! { diff --git a/src/internal/pickle.rs b/src/internal/pickle.rs index 873985a..55f6f2a 100644 --- a/src/internal/pickle.rs +++ b/src/internal/pickle.rs @@ -1,73 +1,720 @@ +//! There are utilities for creating and loading pickle states and objects. + use std::ptr; use crate::internal::alias; -/// Pickle object -pub struct Pickle( - // Always is tuple - alias::PyObject, -); +/// A simple Python scalar value. +/// +/// | Rust type | Python type | +/// |-----------|-------------| +/// | `usize` | `int` | +/// | `isize` | `int` | +/// | `f64` | `float` | +/// | `bool` | `bool` | +/// | `&str` | `str` | +/// +/// [`PyVal::None`] maps to Python's `None`. +#[derive(Debug, Clone, Copy)] +pub enum PyVal<'a> { + Unsigned(usize), + Signed(isize), + Float(f64), + Bool(bool), + Str(&'a str), + None, +} -pub struct PickleBuilder { - // Always is tuple - tuple: ptr::NonNull, - size: isize, - current: isize, +impl From for PyVal<'static> { + fn from(v: usize) -> Self { + PyVal::Unsigned(v) + } +} +impl From for PyVal<'static> { + fn from(v: isize) -> Self { + PyVal::Signed(v) + } +} +impl From for PyVal<'static> { + fn from(v: f64) -> Self { + PyVal::Float(v) + } +} +impl From for PyVal<'static> { + fn from(v: bool) -> Self { + PyVal::Bool(v) + } +} +impl<'a> From<&'a str> for PyVal<'a> { + fn from(v: &'a str) -> Self { + PyVal::Str(v) + } } -impl Pickle { - pub fn builder(py: pyo3::Python, size: isize) -> pyo3::PyResult { - let tuple = unsafe { pyo3::ffi::PyTuple_New(size) }; +impl<'a> PyVal<'a> { + /// Allocate a fresh owned Python object. The caller is responsible for + /// exactly one `Py_DECREF` (or transferring ownership to a container). + pub(crate) unsafe fn into_py_raw( + self, + py: pyo3::Python<'_>, + ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { + let ptr = match self { + PyVal::Unsigned(v) => pyo3::ffi::PyLong_FromSize_t(v), + PyVal::Signed(v) => pyo3::ffi::PyLong_FromSsize_t(v), + PyVal::Float(v) => pyo3::ffi::PyFloat_FromDouble(v), + PyVal::Bool(v) => { + // Py_True / Py_False are singletons; INCREF to hand out our own ref. + let raw = if v { + pyo3::ffi::Py_True() + } else { + pyo3::ffi::Py_False() + }; + pyo3::ffi::Py_INCREF(raw); + raw + } + PyVal::Str(v) => pyo3::ffi::PyUnicode_FromStringAndSize( + v.as_ptr() as *const std::os::raw::c_char, + v.len() as isize, + ), + PyVal::None => { + let raw = pyo3::ffi::Py_None(); + pyo3::ffi::Py_INCREF(raw); + raw + } + }; - if tuple.is_null() { + if ptr.is_null() { Err(pyo3::PyErr::fetch(py)) } else { - Ok(PickleBuilder { - tuple: unsafe { ptr::NonNull::new_unchecked(tuple) }, - size, - current: 0, - }) + Ok(ptr) } } } +/// A finalised pickle state — an immutable wrapper around a Python tuple. +/// +/// Construct with [`Pickle::builder`]. +/// +/// # Immutable access +/// +/// `Pickle` implements [`Deref`] and [`AsRef`] targeting the inner +/// [`alias::PyObject`], so you can pass it wherever a `PyObject` reference is +/// expected without an explicit conversion. Typed access is available via +/// [`Pickle::as_object`] and [`Pickle::as_tuple`]. +/// +/// [`Deref`]: std::ops::Deref +pub struct Pickle(alias::PyObject); + +impl Pickle { + /// Begin building a top-level pickle tuple with exactly `size` slots. + pub fn builder(py: pyo3::Python<'_>, size: isize) -> pyo3::PyResult { + PickleBuilder::new(py, size) + } + + /// Borrow the inner [`alias::PyObject`] without consuming `self`. + #[inline] + pub fn as_object(&self) -> &alias::PyObject { + &self.0 + } +} + +impl std::ops::Deref for Pickle { + type Target = alias::PyObject; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl AsRef for Pickle { + #[inline] + fn as_ref(&self) -> &alias::PyObject { + &self.0 + } +} + impl From for alias::PyObject { - fn from(value: Pickle) -> Self { - value.0 + fn from(v: Pickle) -> Self { + v.0 } } +// All three sequence-like builders (PickleBuilder, TupleBuilder, ListBuilder) +// expose the same `push` / `push_tuple` / `push_list` / `push_dict` surface. +// Rather than repeating three times, we generate them with a macro. +// +// Each builder must provide an **inherent** method: +// +// unsafe fn push_owned_impl( +// &mut self, +// py: pyo3::Python<'_>, +// item: *mut pyo3::ffi::PyObject, // caller hands over ownership +// ) -> pyo3::PyResult<()> + +macro_rules! impl_push_methods { + ($ty:ident) => { + impl $ty { + /// Push a scalar [`PyVal`] (or anything that converts `Into`). + /// + /// ```rust,ignore + /// builder.push(py, 42isize)? + /// .push(py, "hello")? + /// .push(py, 3.14f64)?; + /// ``` + pub fn push<'a, V>(&mut self, py: pyo3::Python<'_>, val: V) -> pyo3::PyResult<&mut Self> + where + V: Into>, + { + let raw = unsafe { val.into().into_py_raw(py)? }; + unsafe { + self.push_owned_impl(py, raw)?; + } + Ok(self) + } + + /// Push a nested tuple whose items are filled by the closure `f`. + /// + /// `size` must equal the exact number of items `f` will push. + /// + /// ```rust,ignore + /// builder.push_tuple(py, 2, |t| { + /// t.push(py, 3isize)?.push(py, 4isize)?; + /// Ok(()) + /// })?; + /// ``` + pub fn push_tuple( + &mut self, + py: pyo3::Python<'_>, + size: isize, + f: F, + ) -> pyo3::PyResult<&mut Self> + where + F: FnOnce(&mut TupleBuilder) -> pyo3::PyResult<()>, + { + let mut b = TupleBuilder::new(py, size)?; + f(&mut b)?; + // into_raw transfers ownership; Drop becomes a no-op. + unsafe { + self.push_owned_impl(py, b.into_raw())?; + } + Ok(self) + } + + /// Push a nested list whose items are filled by the closure `f`. + /// + /// ```rust,ignore + /// builder.push_list(py, |l| { + /// l.push(py, 1isize)?.push(py, "A")?; + /// Ok(()) + /// })?; + /// ``` + pub fn push_list(&mut self, py: pyo3::Python<'_>, f: F) -> pyo3::PyResult<&mut Self> + where + F: FnOnce(&mut ListBuilder) -> pyo3::PyResult<()>, + { + let mut b = ListBuilder::new(py)?; + f(&mut b)?; + unsafe { + self.push_owned_impl(py, b.into_raw())?; + } + Ok(self) + } + + /// Push a nested dict whose entries are filled by the closure `f`. + /// + /// ```rust,ignore + /// builder.push_dict(py, |d| { + /// d.entry(py, "key", 42isize)?; + /// Ok(()) + /// })?; + /// ``` + pub fn push_dict(&mut self, py: pyo3::Python<'_>, f: F) -> pyo3::PyResult<&mut Self> + where + F: FnOnce(&mut DictBuilder) -> pyo3::PyResult<()>, + { + let mut b = DictBuilder::new(py)?; + f(&mut b)?; + unsafe { + self.push_owned_impl(py, b.into_raw())?; + } + Ok(self) + } + } + }; +} + +/// Builds the top-level Python tuple that represents a pickle state. +/// +/// All slots **must** be filled before calling [`finish`](PickleBuilder::finish). +/// In debug builds an assertion verifies this; the tuple is otherwise valid but +/// partially initialised (CPython represents unfilled slots as `NULL`). +/// +/// If the builder is dropped before `finish` is called, the partially-built +/// tuple is correctly decreffed and all already-inserted items are released. +/// +/// # Example +/// +/// Reproduces `(4567, 23343, {3: 4, "a": 39, "AA": (3, 4)}, [2, 3, 4, (4, 5), "A"])`: +/// +/// ```rust,ignore +/// let pickle = Pickle::builder(py, 4)? +/// .push(py, 4567usize)? +/// .push(py, 23343usize)? +/// .push_dict(py, |d| { +/// d.entry(py, 3isize, 4isize)? +/// .entry(py, "a", 39isize)? +/// .entry_tuple(py, "AA", 2, |t| { +/// t.push(py, 3isize)?.push(py, 4isize)?; +/// Ok(()) +/// })?; +/// Ok(()) +/// })? +/// .push_list(py, |l| { +/// l.push(py, 2isize)? +/// .push(py, 3isize)? +/// .push(py, 4isize)? +/// .push_tuple(py, 2, |t| { +/// t.push(py, 4isize)?.push(py, 5isize)?; +/// Ok(()) +/// })? +/// .push(py, "A")?; +/// Ok(()) +/// })? +/// .finish(py); +/// ``` +pub struct PickleBuilder { + /// `None` only after `finish()` has transferred ownership. + inner: Option>, + size: isize, + current: isize, +} + impl PickleBuilder { - pub fn unsigned(&mut self, val: usize) -> &mut Self { - debug_assert!(self.current < self.size); + fn new(py: pyo3::Python<'_>, size: isize) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyTuple_New(size) }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + Ok(Self { + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + size, + current: 0, + }) + } - unsafe { - let x = pyo3::ffi::PyLong_FromSize_t(val); - debug_assert!(!x.is_null()); + /// # Reference-count contract + /// `PyTuple_SetItem` **steals** `item` on success and **decrefs** it on + /// failure, so this function must not touch `item`'s refcount after the call. + unsafe fn push_owned_impl( + &mut self, + py: pyo3::Python<'_>, + item: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + debug_assert!( + self.current < self.size, + "PickleBuilder: pushed more items than `size`" + ); + let ptr = self.inner.expect("PickleBuilder already consumed").as_ptr(); + if pyo3::ffi::PyTuple_SetItem(ptr, self.current, item) != 0 { + // item was already decreffed by PyTuple_SetItem on failure + return Err(pyo3::PyErr::fetch(py)); + } + self.current += 1; + Ok(()) + } + + /// Finalise the builder into a [`Pickle`]. + /// + /// # Panics (debug only) + /// Panics if some slots were never filled. + pub fn finish(mut self, py: pyo3::Python<'_>) -> Pickle { + debug_assert_eq!( + self.current, + self.size, + "PickleBuilder::finish called with {} unfilled slot(s)", + self.size - self.current, + ); + // Take ownership — Drop will be a no-op (inner == None). + let ptr = self + .inner + .take() + .expect("PickleBuilder already consumed") + .as_ptr(); + let bound = unsafe { pyo3::Bound::from_owned_ptr(py, ptr) }; + Pickle(bound.unbind()) + } +} + +impl_push_methods!(PickleBuilder); - debug_assert!(pyo3::ffi::PyTuple_SetItem(self.tuple.as_ptr(), self.current, x) == 0); +impl Drop for PickleBuilder { + fn drop(&mut self) { + // Releases the tuple and all items already inserted into it. + if let Some(nn) = self.inner.take() { + unsafe { + pyo3::ffi::Py_DECREF(nn.as_ptr()); + } } + } +} +/// Builds a Python tuple for embedding inside another container. +/// +/// Can also be used standalone via [`TupleBuilder::build`], which returns a +/// plain [`alias::PyObject`] (a Python `tuple`). +pub struct TupleBuilder { + inner: Option>, + size: isize, + current: isize, +} + +impl TupleBuilder { + /// Allocate a new tuple with `size` pre-allocated slots. + pub fn new(py: pyo3::Python<'_>, size: isize) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyTuple_New(size) }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + Ok(Self { + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + size, + current: 0, + }) + } + + /// Consume the builder and surrender ownership of the raw pointer to the + /// caller (used internally to insert into a parent container). + pub(crate) fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { + // Drop becomes a no-op because `inner` is now None. + self.inner + .take() + .expect("TupleBuilder already consumed") + .as_ptr() + } + + unsafe fn push_owned_impl( + &mut self, + py: pyo3::Python<'_>, + item: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + debug_assert!( + self.current < self.size, + "TupleBuilder: pushed more items than `size`" + ); + let ptr = self.inner.expect("TupleBuilder already consumed").as_ptr(); + if pyo3::ffi::PyTuple_SetItem(ptr, self.current, item) != 0 { + return Err(pyo3::PyErr::fetch(py)); + } self.current += 1; - self + Ok(()) + } + + /// Finalise into a standalone Python tuple object. + /// + /// # Panics (debug only) + /// Panics if some slots were never filled. + pub fn build(mut self, py: pyo3::Python<'_>) -> alias::PyObject { + debug_assert_eq!( + self.current, + self.size, + "TupleBuilder::build called with {} unfilled slot(s)", + self.size - self.current, + ); + let ptr = self + .inner + .take() + .expect("TupleBuilder already consumed") + .as_ptr(); + let bound = unsafe { pyo3::Bound::from_owned_ptr(py, ptr) }; + bound.unbind() + } +} + +impl_push_methods!(TupleBuilder); + +impl Drop for TupleBuilder { + fn drop(&mut self) { + if let Some(nn) = self.inner.take() { + unsafe { + pyo3::ffi::Py_DECREF(nn.as_ptr()); + } + } + } +} + +/// Builds a Python list of arbitrary length. +/// +/// Unlike [`TupleBuilder`], no size is required upfront; items are appended +/// one by one via [`PyList_Append`]. +pub struct ListBuilder { + /// `None` only after `into_raw()` or `build()`. + inner: Option>, +} + +impl ListBuilder { + /// Create a new, empty list. + pub fn new(py: pyo3::Python<'_>) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyList_New(0) }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + Ok(Self { + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + }) + } + + pub(crate) fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { + self.inner + .take() + .expect("ListBuilder already consumed") + .as_ptr() + } + + /// # Reference-count contract + /// `PyList_Append` does **not** steal `item`; it increments `item`'s refcount + /// on success. We therefore always decref our owned ref after the call, + /// regardless of success or failure. + unsafe fn push_owned_impl( + &mut self, + py: pyo3::Python<'_>, + item: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + let ptr = self.inner.expect("ListBuilder already consumed").as_ptr(); + let result = pyo3::ffi::PyList_Append(ptr, item); + pyo3::ffi::Py_DECREF(item); // release our owned ref in all cases + if result != 0 { + return Err(pyo3::PyErr::fetch(py)); + } + Ok(()) + } + + /// Finalise into a standalone Python list object. + pub fn build(mut self, py: pyo3::Python<'_>) -> alias::PyObject { + let ptr = self + .inner + .take() + .expect("ListBuilder already consumed") + .as_ptr(); + let bound = unsafe { pyo3::Bound::from_owned_ptr(py, ptr) }; + bound.unbind() + } +} + +impl_push_methods!(ListBuilder); + +impl Drop for ListBuilder { + fn drop(&mut self) { + if let Some(nn) = self.inner.take() { + unsafe { + pyo3::ffi::Py_DECREF(nn.as_ptr()); + } + } + } +} + +/// Builds a Python dict. +/// +/// Keys must be [`PyVal`] scalars (integers, floats, bools, strings, `None`). +/// Values may be scalars **or** nested containers built via the `entry_tuple`, +/// `entry_list`, and `entry_dict` methods. +/// +/// # Example +/// +/// Reproduces `{3: 4, "a": 39, "AA": (3, 4)}`: +/// +/// ```rust,ignore +/// let obj = DictBuilder::new(py)? +/// .entry(py, 3isize, 4isize)? +/// .entry(py, "a", 39isize)? +/// .entry_tuple(py, "AA", 2, |t| { +/// t.push(py, 3isize)?.push(py, 4isize)?; +/// Ok(()) +/// })? +/// .build(py); +/// ``` +pub struct DictBuilder { + inner: Option>, +} + +impl DictBuilder { + /// Create a new, empty dict. + pub fn new(py: pyo3::Python<'_>) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyDict_New() }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + Ok(Self { + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + }) } - pub fn signed(&mut self, val: isize) -> &mut Self { - debug_assert!(self.current < self.size); + pub(crate) fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { + self.inner + .take() + .expect("DictBuilder already consumed") + .as_ptr() + } + /// # Reference-count contract + /// `PyDict_SetItem` does **not** steal either `key` or `val`. + /// This helper takes ownership of both and decrefs them unconditionally. + unsafe fn set_kv( + &mut self, + py: pyo3::Python<'_>, + key: *mut pyo3::ffi::PyObject, + val: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + let ptr = self.inner.expect("DictBuilder already consumed").as_ptr(); + let result = pyo3::ffi::PyDict_SetItem(ptr, key, val); + // Always release our owned refs regardless of success/failure. + pyo3::ffi::Py_DECREF(key); + pyo3::ffi::Py_DECREF(val); + if result != 0 { + Err(pyo3::PyErr::fetch(py)) + } else { + Ok(()) + } + } + + /// Insert `key → val` where both are [`PyVal`] scalars. + /// + /// ```rust,ignore + /// d.entry(py, 3isize, 4isize)? + /// .entry(py, "name", "Alice")? + /// .entry(py, true, 1.0f64)?; + /// ``` + pub fn entry<'k, 'v, K, V>( + &mut self, + py: pyo3::Python<'_>, + key: K, + val: V, + ) -> pyo3::PyResult<&mut Self> + where + K: Into>, + V: Into>, + { unsafe { - let x = pyo3::ffi::PyLong_FromSsize_t(val); - debug_assert!(!x.is_null()); + let kptr = key.into().into_py_raw(py)?; + let vptr = match val.into().into_py_raw(py) { + Ok(v) => v, + Err(e) => { + pyo3::ffi::Py_DECREF(kptr); // clean up key we already allocated + return Err(e); + } + }; + self.set_kv(py, kptr, vptr)?; + } + Ok(self) + } - debug_assert!(pyo3::ffi::PyTuple_SetItem(self.tuple.as_ptr(), self.current, x) == 0); + /// Insert `key → (nested tuple)`. + /// + /// ```rust,ignore + /// d.entry_tuple(py, "coords", 2, |t| { + /// t.push(py, 10isize)?.push(py, 20isize)?; + /// Ok(()) + /// })?; + /// ``` + pub fn entry_tuple<'k, K, F>( + &mut self, + py: pyo3::Python<'_>, + key: K, + size: isize, + f: F, + ) -> pyo3::PyResult<&mut Self> + where + K: Into>, + F: FnOnce(&mut TupleBuilder) -> pyo3::PyResult<()>, + { + let mut b = TupleBuilder::new(py, size)?; + f(&mut b)?; + let vptr = b.into_raw(); // transfer ownership out of TupleBuilder + unsafe { + let kptr = match key.into().into_py_raw(py) { + Ok(k) => k, + Err(e) => { + pyo3::ffi::Py_DECREF(vptr); // release value we built + return Err(e); + } + }; + self.set_kv(py, kptr, vptr)?; } + Ok(self) + } - self.current += 1; - self + /// Insert `key → [nested list]`. + pub fn entry_list<'k, K, F>( + &mut self, + py: pyo3::Python<'_>, + key: K, + f: F, + ) -> pyo3::PyResult<&mut Self> + where + K: Into>, + F: FnOnce(&mut ListBuilder) -> pyo3::PyResult<()>, + { + let mut b = ListBuilder::new(py)?; + f(&mut b)?; + let vptr = b.into_raw(); + unsafe { + let kptr = match key.into().into_py_raw(py) { + Ok(k) => k, + Err(e) => { + pyo3::ffi::Py_DECREF(vptr); + return Err(e); + } + }; + self.set_kv(py, kptr, vptr)?; + } + Ok(self) } - pub fn finish(self, py: pyo3::Python) -> Pickle { - let bound = unsafe { pyo3::Bound::from_owned_ptr(py, self.tuple.as_ptr()) }; - Pickle(bound.unbind()) + /// Insert `key → {nested dict}`. + pub fn entry_dict<'k, K, F>( + &mut self, + py: pyo3::Python<'_>, + key: K, + f: F, + ) -> pyo3::PyResult<&mut Self> + where + K: Into>, + F: FnOnce(&mut DictBuilder) -> pyo3::PyResult<()>, + { + let mut b = DictBuilder::new(py)?; + f(&mut b)?; + let vptr = b.into_raw(); + unsafe { + let kptr = match key.into().into_py_raw(py) { + Ok(k) => k, + Err(e) => { + pyo3::ffi::Py_DECREF(vptr); + return Err(e); + } + }; + self.set_kv(py, kptr, vptr)?; + } + Ok(self) + } + + /// Finalise into a standalone Python dict object. + pub fn build(mut self, py: pyo3::Python<'_>) -> alias::PyObject { + let ptr = self + .inner + .take() + .expect("DictBuilder already consumed") + .as_ptr(); + let bound = unsafe { pyo3::Bound::from_owned_ptr(py, ptr) }; + bound.unbind() + } +} + +impl Drop for DictBuilder { + fn drop(&mut self) { + if let Some(nn) = self.inner.take() { + unsafe { + pyo3::ffi::Py_DECREF(nn.as_ptr()); + } + } } } diff --git a/src/internal/utils.rs b/src/internal/utils.rs index b5dfba5..46eeda6 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -1,7 +1,209 @@ +use std::fmt::Write; + +use std::sync::atomic; +use std::sync::Arc; + +use crate::internal::alias; + +/// Tries to hash `arg1`. +/// +/// # Safety +/// Pointer must be valid, non-null, live Python objects. +#[inline(always)] +#[optimize(speed)] +pub unsafe fn pyobject_hash( + py: pyo3::Python<'_>, + arg1: *mut pyo3::ffi::PyObject, +) -> pyo3::PyResult { + let py_hash = pyo3::ffi::PyObject_Hash(arg1); + if std::hint::unlikely(py_hash == -1) { + // SAFETY: PyObject_Hash never returns -1 on success. + return Err(pyo3::PyErr::take(py).unwrap_unchecked()); + } + + Ok(py_hash as u64) +} + +/// Pointer-equality fast path, then Python `==`. +/// +/// # Safety +/// Both pointers must be valid, non-null, live Python objects. +#[inline(always)] +#[optimize(speed)] +pub unsafe fn pyobject_equal( + py: pyo3::Python<'_>, + arg1: *mut pyo3::ffi::PyObject, + arg2: *mut pyo3::ffi::PyObject, +) -> pyo3::PyResult { + if std::ptr::eq(arg1, arg2) { + return Ok(true); + } + + let boolean = pyo3::ffi::PyObject_RichCompareBool(arg1, arg2, pyo3::ffi::Py_EQ); + + if boolean < 0 { + Err(pyo3::PyErr::take(py).unwrap_unchecked()) + } else { + Ok(boolean == 1) + } +} + +/// Calls a Python `getsizeof(key, value) -> int` callable via raw FFI for maximum performance. +/// +/// # Errors +/// Propagates any Python exception raised by `getsizeof`, and also returns a `PyErr` if: +/// - the return value is not an integer +/// - `PyLong_AsSsize_t` returns `-1` with a live Python exception (overflow / type error) +#[inline(always)] +#[optimize(speed)] +pub unsafe fn call_getsizeof( + py: pyo3::Python<'_>, + getsizeof: Option<&alias::PyObject>, + key: *mut pyo3::ffi::PyObject, + value: *mut pyo3::ffi::PyObject, +) -> pyo3::PyResult { + if getsizeof.is_none() { + return Ok(1); + } + + // SAFETY: + // - All three pointers are valid, live Python objects for the duration of this call. + // - `PyTuple_New(2)` + `PyTuple_SET_ITEM` is the canonical way to build a + // short-lived call tuple without going through Python's allocator twice. + // - `PyTuple_SET_ITEM` steals a reference, so we `Py_INCREF` key and value first. + // - We own `args` and decrement it after the call. + unsafe { + let getsizeof = getsizeof.unwrap_unchecked(); + + let args = pyo3::ffi::PyTuple_New(2); + if args.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + + // PyTuple_SET_ITEM steals the reference, so we need to increment first. + pyo3::ffi::Py_INCREF(key); + pyo3::ffi::Py_INCREF(value); + pyo3::ffi::PyTuple_SET_ITEM(args, 0, key); + pyo3::ffi::PyTuple_SET_ITEM(args, 1, value); + + let result = pyo3::ffi::PyObject_Call(getsizeof.as_ptr(), args, std::ptr::null_mut()); + pyo3::ffi::Py_DECREF(args); + + if result.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + + // PyLong_AsSsize_t returns -1 on error. + // It never allocates and is the fastest int extraction path. + let size = pyo3::ffi::PyLong_AsSsize_t(result); + pyo3::ffi::Py_DECREF(result); + + if size == -1 { + if let Some(err) = pyo3::PyErr::take(py) { + return Err(err); + } + } + + Ok(size as usize) + } +} + +/// Formats an iterator of key-value pairs into a string representation. +/// +/// Very useful for implementing `__repr__` methods. +pub fn items_to_str(items: I, length: usize) -> Result +where + K: std::fmt::Debug, + V: std::fmt::Debug, + I: IntoIterator, +{ + const EDGE: usize = 50; + const LIMIT: usize = EDGE * 2; + + let mut out = String::with_capacity(64 + length.min(LIMIT) * 16); + out.write_char('{')?; + + // Fast path + if length <= LIMIT { + for (i, (k, v)) in items.into_iter().enumerate() { + if i > 0 { + out.write_str(", ")?; + } + + write!(out, "{k:?}:{v:?}")?; + } + out.write_char('}')?; + + return Ok(out); + } + + let mut iter = items.into_iter(); + + for i in 0..EDGE { + if let Some((k, v)) = iter.next() { + if i > 0 { + out.write_str(", ")?; + } + write!(out, "{k:?}:{v:?}")?; + } + } + + let mut ring: Vec<(K, V)> = Vec::with_capacity(EDGE); + let mut head: usize = 0; + + for item in iter { + if ring.len() < EDGE { + ring.push(item); + } else { + ring[head] = item; + head = (head + 1) % EDGE; + } + } + + let tail_len = ring.len(); + let truncated = length - EDGE - tail_len; + write!(out, ", ... {truncated} truncated ..., ")?; + + for i in 0..tail_len { + let (k, v) = &ring[(head + i) % EDGE]; + if i > 0 { + out.write_str(", ")?; + } + write!(out, "{k:?}:{v:?}")?; + } + + out.write_char('}')?; + Ok(out) +} + +/// Returns the type name of a [`pyo3::ffi::PyObject`]. +/// +/// Returns `""` on failure. +#[inline] +pub fn get_type_name<'a>(py: pyo3::Python<'a>, obj: *mut pyo3::ffi::PyObject) -> String { + use pyo3::types::PyStringMethods; + use pyo3::types::PyTypeMethods; + + unsafe { + let type_ = pyo3::ffi::Py_TYPE(obj); + + if type_.is_null() { + String::from("") + } else { + let obj = pyo3::types::PyType::from_borrowed_type_ptr(py, type_); + + obj.fully_qualified_name() + .map(|x| x.to_string_lossy().into_owned()) + .unwrap_or_else(|_| String::from("")) + } + } +} + /// It can use as PyO3 function argument. When an argument is specified, you will get [`OptionalArgument::Defined`], /// otherwise you will get [`OptionalArgument::Undefined`]. /// /// It can be used instead of [`Option`] to improve performance. +#[derive(Debug, Clone)] pub enum OptionalArgument<'a> { /// The argument was not provided by the caller. Undefined, @@ -16,3 +218,122 @@ impl<'a, 'py> pyo3::FromPyObject<'a, 'py> for OptionalArgument<'py> { Ok(Self::Defined(obj.to_owned())) } } + +/// Generation version implementation +/// +/// Very useful for checking changes while iteration, like what CPython does; +/// because we can't use lifetimes. +/// +/// ```rust +/// let x = GenerationVersion::default(); +/// +/// x.increment(); +/// assert!(x.get() == 1); +/// ``` +#[derive(Debug, Clone, Default)] +#[repr(transparent)] +pub struct GenerationVersion(Arc); + +impl GenerationVersion { + #[inline] + pub fn increment(&self) -> u32 { + self.0.fetch_add(1, atomic::Ordering::SeqCst) + } + + #[inline] + pub fn get(&self) -> u32 { + self.0.load(atomic::Ordering::Relaxed) + } +} + +/// Precomputed Hash PyObject +/// +/// A precomputed hash is a cryptographic hash value that's calculated in advance +/// and stored for later use, rather than being computed on demand when needed. +#[derive(Debug)] +pub struct PrecomputedHashObject { + object: alias::PyObject, + hash: u64, +} + +impl PrecomputedHashObject { + /// Creates a new [`PrecomputedHashObject`] with a pre-calculated hash. + #[inline] + pub fn with_precomputed_hash(object: alias::PyObject, hash: u64) -> Self { + Self { object, hash } + } + + /// Tries to get `object` hash, then creates a new [`PrecomputedHashObject`]. + #[inline] + pub fn new(py: pyo3::Python<'_>, object: alias::PyObject) -> pyo3::PyResult { + let hash = unsafe { pyobject_hash(py, object.as_ptr())? }; + Ok(Self::with_precomputed_hash(object, hash)) + } + + pub fn hash(&self) -> u64 { + self.hash + } + + /// Pointer-equality fast path, then Python `==`. + pub fn py_eq(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { + unsafe { pyobject_equal(py, self.object.as_ptr(), other.object.as_ptr()) } + } + + /// Makes a clone of `self`. + /// + /// This creates another pointer to the same object, increasing its reference count. + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self { + object: self.object.clone_ref(py), + hash: self.hash, + } + } +} + +impl AsRef for PrecomputedHashObject { + /// Returns a reference to its pyobject + fn as_ref(&self) -> &alias::PyObject { + &self.object + } +} + +impl From for alias::PyObject { + /// Consumes `PrecomputedHashObject` and returns its pyobject + fn from(value: PrecomputedHashObject) -> Self { + value.object + } +} + +#[derive(Debug)] +#[repr(transparent)] +pub struct GetsizeofFunction(Option); + +impl GetsizeofFunction { + /// Creates a new [`GetsizeofFunction`]. + pub fn new(object: Option) -> Self { + Self(object) + } + + /// Makes a clone of `self`. + /// + /// This creates another pointer to the same object, increasing its reference count. + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self(self.0.as_ref().map(|x| x.clone_ref(py))) + } + + /// Calls the wrapped function to get size of the pair key-value. + pub fn call( + &self, + py: pyo3::Python<'_>, + key: &alias::PyObject, + value: &alias::PyObject, + ) -> pyo3::PyResult { + unsafe { call_getsizeof(py, self.0.as_ref(), key.as_ptr(), value.as_ptr()) } + } +} + +impl From for Option { + fn from(value: GetsizeofFunction) -> Self { + value.0 + } +} diff --git a/src/lib.rs b/src/lib.rs index 5e3ead1..f7d2d99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,24 +5,38 @@ #[macro_use] mod macro_rules; +mod hashbrown; +mod typeref; -pub mod hashbrown; pub mod internal; +pub mod policies; pub mod pyclasses; #[pyo3::pymodule] mod _core { - // use crate::typeref; + use crate::typeref; - // #[pymodule_export] - // use crate::pyclasses::base::{PyBaseCacheImpl, PyBaseIteratorImpl}; + #[pymodule_export] + use crate::pyclasses::base::PyAsyncBaseCacheImpl; + #[pymodule_export] + use crate::pyclasses::base::PyAsyncBaseIteratorImpl; + #[pymodule_export] + use crate::pyclasses::base::PyBaseCacheImpl; + #[pymodule_export] + use crate::pyclasses::base::PyBaseIteratorImpl; - // #[pymodule_export] - // use crate::pyclasses::cache::{PyCache, PyCacheItems, PyCacheKeys, PyCacheValues}; + #[pymodule_export] + use crate::pyclasses::cache::PyCache; + #[pymodule_export] + use crate::pyclasses::cache::PyCacheItems; + #[pymodule_export] + use crate::pyclasses::cache::PyCacheKeys; + #[pymodule_export] + use crate::pyclasses::cache::PyCacheValues; #[pymodule_init] - pub fn init(_m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { - // typeref::initialize_typeref(m.py()); + pub fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { + typeref::initialize_typeref(m.py()); Ok(()) } } diff --git a/src/macro_rules.rs b/src/macro_rules.rs index e69de29..1ccdf5b 100644 --- a/src/macro_rules.rs +++ b/src/macro_rules.rs @@ -0,0 +1,54 @@ +/// Implements a `#[pyclass]` with pre-defined pyclass arguments. +/// +/// # Example +/// +/// ```ignore +/// implement_pyclass! { +/// [] MyClass as "MyClass" { field: type } +/// } +/// ``` +#[macro_export] +macro_rules! implement_pyclass { + ( + $(#[$outer:meta])* + [$($pyclass_args:tt)*] $struct_name:ident as $python_name:literal $($rest:tt)* + ) => { + #[pyo3::pyclass( + module = "cachebox._core", + name = $python_name, + immutable_type, + skip_from_py_object, + $($pyclass_args)* + )] + $(#[$outer])* + pub struct $struct_name $($rest)* + }; +} + +/// Creates a new [`PyErr`] of the given exception type. +#[macro_export] +macro_rules! new_py_error { + ($name:ident, $msg:expr $(,)?) => { + ::pyo3::exceptions::$name::new_err($msg) + }; + ($name:ident, $fmt:expr, $($args:tt)*) => { + ::pyo3::exceptions::$name::new_err( + format_args!($fmt, $($args)*) + ) + }; +} + +/// Creates a new std::num::NonZeroUsize safely. Uses `isize::MAX as usize` when `num` is zero. +/// +/// # Usage +/// +/// ```ignore +/// safe_non_zero!(2) -> std::num::NonZeroUsize(2) +/// safe_non_zero!(0) -> std::num::NonZeroUsize(isize::MAX as usize) +/// ``` +#[macro_export] +macro_rules! safe_non_zero { + ($num:expr) => { + std::num::NonZeroUsize::new(if $num == 0 { isize::MAX as usize } else { $num }).unwrap() + }; +} diff --git a/src/policies/mod.rs b/src/policies/mod.rs index e69de29..767bb21 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -0,0 +1,5 @@ +pub mod traits; + +pub mod nopolicy; + +pub mod wrapped; diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index e69de29..f7aa25a 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -0,0 +1,379 @@ +use crate::hashbrown; +use crate::internal::alias; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::PolicyExt; + +/// A key-value pair with a precomputed hash and combined memory size. +/// +/// The `size` field caches the result of `getsizeof(key) + getsizeof(value)` +/// so that [`NoPolicy`] can maintain an accurate `currsize` budget without +/// re-invoking the Python-side sizing function on every access. +pub struct Handle { + /// The cache key together with its precomputed hash, avoiding repeated + /// Python hash calls during table lookups. + key: utils::PrecomputedHashObject, + /// The cached value associated with this key. + value: alias::PyObject, + /// Combined memory footprint of the key and value as reported by `getsizeof`. + size: usize, +} + +impl Handle { + /// Creates a new [`Handle`], which calculates the precomputed hash itself. + pub fn new( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + Self::with_precomputed_hash_key( + py, + getsizeof, + utils::PrecomputedHashObject::new(py, key)?, + value, + ) + } + + /// Creates a new [`Handle`] from an already-hashed key. + /// + /// Prefer this over [`Handle::new`] when the caller has already paid the cost + /// of computing the hash (e.g. during a table lookup that preceded insertion). + pub fn with_precomputed_hash_key( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + key: utils::PrecomputedHashObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + let size = getsizeof.call(py, key.as_ref(), &value)?; + Ok(Self { key, value, size }) + } + + /// Consumes `self` and returns the [`utils::PrecomputedHashObject`]. + pub fn into_key(self) -> utils::PrecomputedHashObject { + self.key + } + + /// Returns a reference to the value. + pub fn value(&self) -> &alias::PyObject { + &self.value + } + + /// Consumes `self` and returns the value of the pair. + pub fn into_value(self) -> alias::PyObject { + self.value + } + + /// Consumes `self` and returns the pair. + pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { + (self.key.into(), self.value) + } + + /// Makes a clone of self. + /// + /// This creates another pointer to the same object, increasing its reference count. + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self { + key: self.key.clone_ref(py), + value: self.value.clone_ref(py), + size: self.size, + } + } +} + +impl traits::HandleExt for Handle { + type Key = utils::PrecomputedHashObject; + + fn key(&self) -> &utils::PrecomputedHashObject { + &self.key + } + + fn size(&self) -> usize { + self.size + } +} + +/// A view into an occupied entry in [`NoPolicy`]. +/// +/// Holds a mutable reference to the parent policy and a raw bucket pointer +/// to the existing [`Handle`], enabling in-place removal or replacement without +/// an additional lookup. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut NoPolicy, + /// Raw bucket pointing to the occupied slot within the hash table. + bucket: hashbrown::raw::Bucket, +} + +impl traits::EntryExt for Occupied<'_> { + type Handle = Handle; + + fn would_exceed(&self, extra_size: usize) -> bool { + let handle = unsafe { self.bucket.as_ref() }; + + self.policy + .currsize + .saturating_add(extra_size) + .saturating_sub(handle.size) + >= self.policy.maxsize.get() + } + + fn evict(&mut self) -> pyo3::PyResult { + self.policy.evict() + } +} + +impl traits::OccupiedExt for Occupied<'_> { + fn remove(self) -> Self::Handle { + let (h, _) = unsafe { self.policy.table.remove(self.bucket) }; + self.policy.currsize = self.policy.currsize.saturating_sub(h.size); + self.policy.gv.increment(); + h + } + + fn replace(self, new: Self::Handle) -> Self::Handle { + self.policy.currsize = self.policy.currsize.saturating_add(new.size); + let old = unsafe { std::mem::replace(self.bucket.as_mut(), new) }; + self.policy.currsize = self.policy.currsize.saturating_sub(old.size); + old + } +} + +/// A view into a vacant slot in [`NoPolicy`]. +/// +/// Holds a mutable reference to the parent policy, allowing a new [`Handle`] +/// to be inserted into the pre-located empty slot without a second lookup. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut NoPolicy, + /// If true, means we used `.evict()` method, and empty slots are available + /// in table; so we don't need to reserve a new one. + space_available: bool, +} + +impl traits::EntryExt for Vacant<'_> { + type Handle = Handle; + + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) >= self.policy.maxsize.get() + } + + fn evict(&mut self) -> pyo3::PyResult { + self.policy.evict() + } +} + +impl traits::VacantExt for Vacant<'_> { + fn insert(self, handle: Self::Handle) { + self.policy.currsize = self.policy.currsize.saturating_add(handle.size); + + if !self.space_available { + self.policy.table.reserve(1, |x| x.key.hash()); + } + unsafe { + self.policy.table.insert_no_grow(handle.key.hash(), handle); + } + + self.policy.gv.increment(); + } +} + +/// A cache policy that performs **no eviction**. +/// +/// Insertions are rejected once `currsize` would exceed `maxsize`; the caller +/// must free space manually or accept the refusal. This is useful when the +/// eviction strategy is handled externally, or when a hard size cap with no +/// silent data loss is desired. +pub struct NoPolicy { + /// The raw hash table storing all live [`Handle`] entries. + table: hashbrown::raw::RawTable, + /// Hard upper bound on `currsize`. Stored as [`NonZeroUsize`](std::num::NonZeroUsize) + /// so the compiler can elide a zero-check branch in division/comparison hot paths. + maxsize: std::num::NonZeroUsize, + /// Running total of all stored handles' sizes, maintained incrementally. + currsize: usize, + /// Monotonically incrementing counter bumped on every structural mutation + /// (insert, remove, clear, shrink). Used to detect iterator invalidation. + gv: utils::GenerationVersion, + /// Callable used to measure the memory footprint of each key-value pair. + getsizeof: utils::GetsizeofFunction, +} + +impl NoPolicy { + /// Creates a new [`NoPolicy`] with the given initial `capacity` (number of slots) + /// and a `maxsize` budget limit. + /// + /// The underlying hash table is pre-allocated to hold at least `capacity` entries + /// without reallocation. + #[inline] + pub fn new(capacity: usize, maxsize: usize, getsizeof: Option) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + maxsize: safe_non_zero!(maxsize), + currsize: 0, + gv: utils::GenerationVersion::default(), + getsizeof: utils::GetsizeofFunction::new(getsizeof), + } + } + + /// Returns a reference to the underlying raw hash table. + pub fn table(&self) -> &hashbrown::raw::RawTable { + &self.table + } + + /// Returns a snapshot of the current [`utils::GenerationVersion`]. + /// + /// Callers can compare a saved snapshot against a later call to detect + /// whether the table was mutated in the interim. + pub fn generation_version(&self) -> utils::GenerationVersion { + self.gv.clone() + } + + /// Returns a reference to the size-measuring function used during insertion. + pub fn getsizeof(&self) -> &utils::GetsizeofFunction { + &self.getsizeof + } + + /// Makes a clone of `self`. + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.table.capacity()); + + unsafe { + for handle in self.table.iter().map(|x| x.as_ref()) { + table.insert_no_grow(handle.key.hash(), handle.clone_ref(py)); + } + } + + Self { + table, + maxsize: self.maxsize, + currsize: self.currsize, + gv: utils::GenerationVersion::default(), + getsizeof: self.getsizeof.clone_ref(py), + } + } +} + +impl traits::PolicyExt for NoPolicy { + type Handle = Handle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + /// Returns the maximum allowed cumulative size of all stored entries. + fn maxsize(&self) -> usize { + self.maxsize.get() + } + + /// Returns the current cumulative size of all stored entries. + fn current_size(&self) -> usize { + self.currsize + } + + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let bucket = self.table.find(key.hash(), |x| key.py_eq(py, &x.key))?; + Ok(bucket.map(|x| unsafe { x.as_ref() })) + } + + fn entry( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult, Self::Vacant<'_>>> { + match self.table.find(key.hash(), |x| key.py_eq(py, &x.key))? { + Some(bucket) => { + let result = Occupied { + policy: self, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + space_available: false, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + fn evict(&mut self) -> pyo3::PyResult { + Err(new_py_error!( + PyNotImplementedError, + "The cache has no algorithm to evict items" + )) + } + + fn shrink_to_fit(&mut self) { + let initial = self.table.capacity(); + self.table.shrink_to(0, |x| x.key.hash()); + + if initial != self.table.capacity() { + self.gv.increment(); + } + } + + fn clear(&mut self) { + if self.table.is_empty() { + return; + } + self.table.clear(); + self.gv.increment(); + } + + fn py_eq(&self, py: pyo3::Python, other: &Self) -> pyo3::PyResult { + if self.maxsize() != other.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + let result = unsafe { + let mut iterator = self.table.iter().map(|x| x.as_ref()); + + iterator.all(|handle_1| { + let result = other + .table + .get(handle_1.key.hash(), |x| handle_1.key.py_eq(py, &x.key)); + + match result { + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + Ok(None) => false, + Ok(Some(handle_2)) => { + let value_1 = handle_1.value(); + let value_2 = handle_2.value(); + + match utils::pyobject_equal(py, value_1.as_ptr(), value_2.as_ptr()) { + Ok(result) => result, + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + } + } + } + }) + }; + + if let Some(error) = error { + return Err(error); + } + Ok(result) + } +} diff --git a/src/policies/traits.rs b/src/policies/traits.rs new file mode 100644 index 0000000..a55fb91 --- /dev/null +++ b/src/policies/traits.rs @@ -0,0 +1,120 @@ +pub trait HandleExt { + type Key; + + /// Borrows the key stored in this handle. + fn key(&self) -> &Self::Key; + + /// The size this handle contributes toward [`PolicyExt::maxsize`]. + /// + /// Return `1` for count-based policies or a byte/cost value for + /// size-based policies. Must be `> 0`. + fn size(&self) -> usize; +} + +/// Shared behaviour for occupied and vacant entry guards. +/// +/// Both variants hold a mutable borrow of the parent policy, so budget checks +/// and eviction go through the entry rather than through the policy directly. +pub trait EntryExt { + type Handle: HandleExt; + + /// Returns `true` if adding `extra_size` would meet or exceed + /// [`PolicyExt::weight_limit`]. + /// + /// Call this *before* [`OccupiedExt::replace`] or [`VacantExt::insert`]. + fn would_exceed(&self, extra_size: usize) -> bool; + + /// Evicts one entry, freeing budget for a subsequent insert or replace. + /// + /// # Errors + /// + /// Returns any Python exception raised while dropping the evicted value. + fn evict(&mut self) -> pyo3::PyResult; +} + +/// Guard for an *occupied* slot. +pub trait OccupiedExt: EntryExt { + /// Replaces the current handle with `new`, returning the old one. + /// + /// Does **not** enforce the weight budget; call + /// [`would_exceed`](EntryExt::would_exceed) first. + fn replace(self, new: Self::Handle) -> Self::Handle; + + /// Removes the handle from this slot and returns it. + fn remove(self) -> Self::Handle; +} + +/// Guard for a *vacant* slot. +pub trait VacantExt: EntryExt { + /// Inserts `handle` into this slot. + /// + /// Does **not** enforce the weight budget; call + /// [`would_exceed`](EntryExt::would_exceed) first. + fn insert(self, handle: Self::Handle); +} + +/// The state of a policy slot, returned by [`PolicyExt::entry`]. +pub enum PolicyEntry { + Occupied(O), + Vacant(V), +} + +pub trait PolicyExt { + type Handle: HandleExt; + + type Occupied<'a>: OccupiedExt + 'a + where + Self: 'a; + + type Vacant<'a>: VacantExt + 'a + where + Self: 'a; + + /// Returns the configured maxsize. + fn maxsize(&self) -> usize; + + /// Returns the current total cumulative size consumed by all stored entries. + fn current_size(&self) -> usize; + + /// Looks up a handle by `hash` and `eq`, applying policy side-effects on hit. + /// + /// # Errors + /// + /// Returns `Err` if `eq` raises a Python exception. + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult>; + + /// Returns a [`PolicyEntry`] for the slot at `hash` / `eq`. + /// + /// # Errors + /// + /// Returns `Err` if `eq` raises a Python exception. + fn entry( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult, Self::Vacant<'_>>>; + + /// Evicts a handle according to the policy algorithm, returning it. + /// + /// # Errors + /// + /// Returns `Err` if dropping the evicted value raises a Python exception. + /// + /// # Panics + /// + /// May panic if the policy is empty. + fn evict(&mut self) -> pyo3::PyResult; + + /// Removes all handles without shrinking the allocation. + fn clear(&mut self); + + /// Shrinks the internal allocation as close to length as possible. + fn shrink_to_fit(&mut self); + + /// Performs Python `==`. + fn py_eq(&self, py: pyo3::Python, other: &Self) -> pyo3::PyResult; +} diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs new file mode 100644 index 0000000..74419d7 --- /dev/null +++ b/src/policies/wrapped.rs @@ -0,0 +1,203 @@ +use std::ops::Deref; +use std::ops::DerefMut; + +use crate::internal::alias; +use crate::policies::traits::EntryExt; +use crate::policies::traits::HandleExt; +use crate::policies::traits::OccupiedExt; +use crate::policies::traits::PolicyEntry; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::VacantExt; + +/// A transparent wrapper over [`PolicyExt`] implementations that adds +/// higher-level methods shared across all policies. +/// +/// - [`insert`](Wrapped::insert) +/// - [`remove`](Wrapped::remove) +/// - [`contains`](Wrapped::contains) +/// - [`extend`](Wrapped::extend). +/// +/// Because the wrapper is `#[repr(transparent)]` and implements [`Deref`] / [`DerefMut`], +/// all methods of the inner policy `P` are directly accessible without unwrapping. +#[repr(transparent)] +pub struct Wrapped(P); + +impl Deref for Wrapped

{ + type Target = P; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Wrapped

{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl Wrapped

{ + /// Wraps an existing policy, granting access to the shared higher-level API. + pub fn new(policy: P) -> Self { + Self(policy) + } + + /// Returns the remaining size. Equals to `maxsize - current_size`. + pub fn remaining_size(&self) -> usize { + self.maxsize().checked_sub(self.current_size()).unwrap_or(0) + } + + /// Returns `true` if the cache contains an entry for `key`. + pub fn contains( + &mut self, + py: pyo3::Python<'_>, + key: &::Key, + ) -> pyo3::PyResult { + let handle = self.0.get(py, key)?; + Ok(handle.is_some()) + } + + /// Inserts a [`Handle`](PolicyExt::Handle) into the cache, evicting entries as needed + /// to stay within the size budget before inserting. + /// + /// - If the key was already present, the old handle is replaced and returned as `Some`. + /// - If the key was absent, the handle is inserted and `None` is returned. + pub fn insert( + &mut self, + py: pyo3::Python<'_>, + handle: P::Handle, + ) -> pyo3::PyResult> { + let entry = self.0.entry(py, handle.key())?; + + match entry { + PolicyEntry::Occupied(mut occupied) => { + // Evict if need + while occupied.would_exceed(handle.size()) { + occupied.evict()?; + } + + Ok(Some(occupied.replace(handle))) + } + PolicyEntry::Vacant(mut vacant) => { + // Evict if need + while vacant.would_exceed(handle.size()) { + vacant.evict()?; + } + + vacant.insert(handle); + Ok(None) + } + } + } + + /// Removes the entry for `key` from the cache, returning its [`Handle`](PolicyExt::Handle) + /// if it was present, or `None` if the key was not found. + pub fn remove( + &mut self, + py: pyo3::Python<'_>, + key: &::Key, + ) -> pyo3::PyResult> { + let entry = self.0.entry(py, key)?; + + match entry { + PolicyEntry::Occupied(occupied) => { + let handle = occupied.remove(); + Ok(Some(handle)) + } + PolicyEntry::Vacant(_) => Ok(None), + } + } + + /// Inserts all key-value pairs from `iterable` into the cache. + /// + /// `transform` converts a raw `(key, value)` Python object pair into a + /// policy-specific [`Handle`](PolicyExt::Handle) before insertion. + /// + /// # Supported iterables + /// + /// - **`dict`** — detected via a fast [`PyObject_TypeCheck`](pyo3::ffi::PyObject_TypeCheck) + /// check and iterated with [`PyDictMethods::items`](pyo3::types::PyDictMethods) to avoid + /// the overhead of a generic Python iterator. + /// - **Any object with an `.items()` method** — covers all cache classes and + /// other dict-like types; `.items()` is called and the result is iterated. + /// - **Any other iterable** — iterated directly, with each element expected to + /// unpack as a `(key, value)` pair. + pub fn extend( + &mut self, + iterable: alias::BoundObject, + mut transform: F, + ) -> pyo3::PyResult<()> + where + F: FnMut(alias::PyObject, alias::PyObject) -> pyo3::PyResult, + { + use pyo3::types::PyAnyMethods; + use pyo3::types::PyDictMethods; + + // Using [pyo3::ffi::PyObject_TypeCheck] and [Bound::cast_unchecked] is so faster than [Bound::cast] + let is_dictionary = unsafe { + pyo3::ffi::PyObject_TypeCheck(iterable.as_ptr(), crate::typeref::STD_DICT_TYPE) == 1 + }; + if is_dictionary { + let dict = unsafe { iterable.cast_unchecked::() }; + + for pair in dict.items() { + let (key, value) = unsafe { + pair.extract::<(alias::PyObject, alias::PyObject)>() + .unwrap_unchecked() + }; + + self.insert(pair.py(), transform(key, value)?)?; + } + + return Ok(()); + } + + // By this we will support everything has `.items()` attribute, + // including our cache classes + let items_iterable = { + if let Some(items_attribute) = iterable.getattr_opt(c"items")? { + items_attribute + } else { + iterable + } + }; + + for pair in items_iterable.try_iter()? { + let pair = pair?; + let (key, value) = pair.extract::<(alias::PyObject, alias::PyObject)>()?; + + self.insert(pair.py(), transform(key, value)?)?; + } + + Ok(()) + } + + /// Calls the `evict()` `n` times and returns count of removed items. + pub fn drain( + &mut self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + if n <= 0 { + return Ok(0); + } + + let mut count: pyo3::ffi::Py_ssize_t = 0; + while count < n { + match self.0.evict() { + Ok(_) => {} + Err(err) => { + if !err.is_instance_of::(py) { + return Err(err); + } + + break; + } + } + + count += 1; + } + + Ok(count) + } +} diff --git a/src/pyclasses/base.rs b/src/pyclasses/base.rs index e69de29..7db625c 100644 --- a/src/pyclasses/base.rs +++ b/src/pyclasses/base.rs @@ -0,0 +1,62 @@ +use crate::internal::alias; + +crate::implement_pyclass! { + /// Base implementation for cache classes in the cachebox library. + /// + /// This abstract base class defines the generic structure for cache implementations, + /// supporting different key and value types through generic type parameters. + /// Serves as a foundation for specific cache variants like Cache and FIFOCache. + #[derive(Debug, Default, Clone, Copy)] + [subclass, generic, frozen] PyBaseCacheImpl as "BaseCacheImpl" ; +} +crate::implement_pyclass! { + /// Base implementation for cache classes in the cachebox library. + /// + /// This abstract base class defines the generic structure for cache implementations, + /// supporting different key and value types through generic type parameters. + /// Serves as a foundation for specific cache variants like Cache and FIFOCache. + #[derive(Debug, Default, Clone, Copy)] + [subclass, generic, frozen] PyAsyncBaseCacheImpl as "AsyncBaseCacheImpl" ; +} +crate::implement_pyclass! { + /// Base implementation for cache classes in the cachebox library. + /// + /// This abstract base class defines the generic structure for cache implementations, + /// supporting different key and value types through generic type parameters. + /// Serves as a foundation for specific cache variants like Cache and FIFOCache. + #[derive(Debug, Default, Clone, Copy)] + [subclass, generic, frozen] PyBaseIteratorImpl as "BaseIteratorImpl" ; +} +crate::implement_pyclass! { + /// Base implementation for cache classes in the cachebox library. + /// + /// This abstract base class defines the generic structure for cache implementations, + /// supporting different key and value types through generic type parameters. + /// Serves as a foundation for specific cache variants like Cache and FIFOCache. + #[derive(Debug, Default, Clone, Copy)] + [subclass, generic, frozen] PyAsyncBaseIteratorImpl as "AsyncBaseIteratorImpl" ; +} + +#[pyo3::pymethods] +impl PyBaseCacheImpl { + #[new] + #[pyo3(signature = (*args, **kwargs))] + #[allow(unused_variables)] + fn __new__(args: alias::ArgsType, kwargs: Option) -> Self { + Self + } + + fn __init__(&self) {} +} + +#[pyo3::pymethods] +impl PyAsyncBaseCacheImpl { + #[new] + #[pyo3(signature = (*args, **kwargs))] + #[allow(unused_variables)] + fn __new__(args: alias::ArgsType, kwargs: Option) -> Self { + Self + } + + fn __init__(&self) {} +} diff --git a/src/pyclasses/cache/mod.rs b/src/pyclasses/cache/mod.rs new file mode 100644 index 0000000..adedb33 --- /dev/null +++ b/src/pyclasses/cache/mod.rs @@ -0,0 +1,6 @@ +mod sync; + +pub use sync::PyCache; +pub use sync::PyCacheItems; +pub use sync::PyCacheKeys; +pub use sync::PyCacheValues; diff --git a/src/pyclasses/cache/sync.rs b/src/pyclasses/cache/sync.rs new file mode 100644 index 0000000..15ec8bd --- /dev/null +++ b/src/pyclasses/cache/sync.rs @@ -0,0 +1,596 @@ +use crate::internal::alias; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::nopolicy; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A thread-safe, memory-efficient key-value cache with no eviction policy. + /// items remain in the cache until manually removed or the cache is cleared. + /// + /// ## How It Works + /// `Cache` is essentially a configurable hashmap-like store. When an item is inserted: + /// - It is stored directly without any ordering, priority tracking, or access metadata. + /// - If a maximum size is configured, insertions beyond that limit are rejected (raises OverflowError). + /// A max size of zero means unlimited. + /// - All read and write operations are thread-safe, making it safe for concurrent access without + /// external locking. + /// + /// Because no eviction logic runs in the background, there is no overhead from tracking usage order, + /// frequency counters, or expiry timestamps. + /// + /// ### Pros + /// - Minimal overhead - no bookkeeping for eviction means lower CPU and memory usage per entry compared + /// to policy-based caches. + /// - Predictable behavior - items are never silently removed, so cache hits are deterministic once an + /// item is stored. + /// - Thread-safe - safe for concurrent reads and writes out of the box. + /// - Configurable capacity - a hard size limit prevents unbounded memory growth. + /// + /// ### Cons + /// - No automatic eviction - the cache can fill up and stop accepting new entries if a max size is set, + /// requiring manual management. + /// - Unordered - unlike a standard dict (Python 3.7+), insertion order is not preserved. + /// - Not suitable for volatile data - stale entries persist forever unless explicitly invalidated. + /// + /// ## When to Use It + /// `Cache` is the right choice when: + /// - You have a fixed, well-known set of keys that are expensive to compute and never go stale + /// (e.g., parsed config values, compiled regex patterns, loaded templates). + /// - The cached data has no meaningful expiry - it's either always valid or always explicitly invalidated. + /// - You need the lowest possible overhead and can guarantee the cache won't grow uncontrollably. + /// + /// Avoid it when cached data can become stale, when the working set is unpredictable in size, or when you need automatic + /// memory pressure relief. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyCache as "Cache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new Cache instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. Zero means unlimited. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let mut wrapped = Wrapped::new(nopolicy::NoPolicy::new(capacity, maxsize, getsizeof)); + + if let Some(iterable) = iterable { + let getsizeof = wrapped.getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| nopolicy::Handle::new(py, &getsizeof, key, value), + ); + self.0.set(wrapped); + result + } else { + self.0.set(wrapped); + Ok(()) + } + } + + /// Returns the number of entries currently in the cache. + fn __len__(&self) -> usize { + let lock = self.0.lock(); + lock.table().len() + } + + fn __sizeof__(&self) -> usize { + let lock = self.0.lock(); + lock.table().capacity() * std::mem::size_of::() + } + + fn __bool__(&self) -> bool { + let lock = self.0.lock(); + lock.table().len() > 0 + } + + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns the specified `maxsize` + fn maxsize(&self) -> usize { + let lock = self.0.lock(); + lock.maxsize() + } + + /// Returns the current total cumulative size consumed by all stored entries. + fn current_size(&self) -> usize { + let lock = self.0.lock(); + lock.current_size() + } + + /// Returns the remaining size. Equals to `maxsize - current_size` + fn remaining_size(&self) -> usize { + let lock = self.0.lock(); + lock.remaining_size() + } + + /// Returns the `getsizeof` function + fn getsizeof(&self, py: pyo3::Python) -> Option { + let lock = self.0.lock(); + lock.getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + fn capacity(&self) -> usize { + let lock = self.0.lock(); + lock.table().capacity() + } + + /// Returns the number of entries currently in the cache. + fn len(&self) -> usize { + let lock = self.0.lock(); + lock.table().len() + } + + /// Returns `true` if the cache contains an entry for `key`. + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let mut lock = self.0.lock(); + lock.contains(py, &key) + } + + /// Returns `True` if cache is empty. + fn is_empty(&self) -> bool { + let lock = self.0.lock(); + lock.table().len() == 0 + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + fn is_full(&self) -> bool { + let lock = self.0.lock(); + lock.current_size() >= lock.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though; + /// + /// Note: raises `OverflowError` if the cache reached the maxsize limit, + /// because this class does not have any algorithm. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let mut lock = self.0.lock(); + let handle = nopolicy::Handle::new(py, lock.getsizeof(), key, value)?; + + let old_handle = lock.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let mut lock = slf.0.lock(); + let getsizeof = lock.getsizeof().clone_ref(py); + + lock.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| nopolicy::Handle::new(py, &getsizeof, key, value), + ) + } + + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + let mut lock = self.0.lock(); + let handle = nopolicy::Handle::new(py, lock.getsizeof(), key, value)?; + + lock.insert(py, handle)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get<'p>( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument<'p>, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let mut lock = self.0.lock(); + + if let Some(x) = lock.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let mut lock = self.0.lock(); + match lock.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let mut lock = self.0.lock(); + if let Some(x) = lock.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x.unbind(), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = nopolicy::Handle::with_precomputed_hash_key( + py, + lock.getsizeof(), + key, + default_object.clone_ref(py), + )?; + + lock.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let mut lock = self.0.lock(); + if let Some(x) = lock.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let mut lock = self.0.lock(); + match lock.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + /// + /// NOTE: `Cache` always raises `NotImplementedError` because has neither policy nor algorithm to evict items. + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let mut lock = self.0.lock(); + let handle = lock.evict()?; + drop(lock); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let mut lock = self.0.lock(); + lock.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + fn shrink_to_fit(&self) { + self.0.lock().shrink_to_fit(); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let mut lock = self.0.lock(); + lock.clear(); + + if !reuse { + lock.shrink_to_fit(); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_lock = slf.0.lock(); + let other_lock = other.0.lock(); + + self_lock.py_eq(py, &*other_lock) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_lock = slf.0.lock(); + let other_lock = other.0.lock(); + + self_lock.py_eq(py, &*other_lock).map(|x| !x) + } + + fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + let lock = self.0.lock(); + let gv = lock.generation_version(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyCacheItems { + iter: parking_lot::Mutex::new(unsafe { lock.table().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + let lock = self.0.lock(); + let gv = lock.generation_version(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyCacheValues { + iter: parking_lot::Mutex::new(unsafe { lock.table().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + let lock = self.0.lock(); + let gv = lock.generation_version(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyCacheKeys { + iter: parking_lot::Mutex::new(unsafe { lock.table().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.keys(py) + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let lock = self.0.lock(); + let cloned = lock.clone_ref(py); + let result = Self(onceinit::OnceInit::new(Wrapped::new(cloned))); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let lock = slf.0.lock(); + + let iter = unsafe { + lock.table() + .iter() + .map(|bucket| bucket.as_ref()) + .map(|handle| { + ( + // Without using `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }) + }; + + let items = utils::items_to_str(iter, lock.table().len()).unwrap(); + format!( + "{}[{}/{}]({})", + utils::get_type_name(py, slf.as_ptr()), + lock.current_size(), + lock.maxsize(), + items + ) + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + let lock = self.0.lock(); + + for handle_ref in unsafe { lock.table().iter() } { + let handle = unsafe { handle_ref.as_ref() }; + + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + self.0.lock().clear(); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] + $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let mut iter = slf.iter.lock(); + + match iter.next() { + Some(x) => { + let $py = slf.py(); + let $handle = unsafe { x.as_ref() }; + Ok($init) + } + None => return Err(new_py_error!(PyStopIteration, ())), + } + } + } + )+ + }; +} +implement_iterator!( + PyCacheItems as "cache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyCacheKeys as "cache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyCacheValues as "cache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/src/pyclasses/mod.rs b/src/pyclasses/mod.rs index e69de29..92c1625 100644 --- a/src/pyclasses/mod.rs +++ b/src/pyclasses/mod.rs @@ -0,0 +1,2 @@ +pub mod base; +pub mod cache; diff --git a/src/typeref.rs b/src/typeref.rs new file mode 100644 index 0000000..0808aa2 --- /dev/null +++ b/src/typeref.rs @@ -0,0 +1,23 @@ +pub static mut STD_DICT_TYPE: *mut pyo3::ffi::PyTypeObject = std::ptr::null_mut(); +pub static mut STD_TUPLE_TYPE: *mut pyo3::ffi::PyTypeObject = std::ptr::null_mut(); + +unsafe fn get_type_object_for( + py: pyo3::Python, +) -> *mut pyo3::ffi::PyTypeObject { + T::type_object_raw(py) +} + +#[cold] +#[optimize(size)] +fn _initialize_typeref(py: pyo3::Python) { + unsafe { + STD_DICT_TYPE = get_type_object_for::(py); + STD_TUPLE_TYPE = get_type_object_for::(py); + } +} + +pub fn initialize_typeref(py: pyo3::Python) { + static INIT: std::sync::Once = std::sync::Once::new(); + + INIT.call_once(|| _initialize_typeref(py)); +} From aaa2feb9c93fc573e86fd65246faa2532cc6e676 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 18 May 2026 20:08:24 +0330 Subject: [PATCH 04/60] Update Cargo.lock --- Cargo.lock | 66 ------------------------------------------------------ 1 file changed, 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a4e2019..3ffed3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,7 +15,6 @@ dependencies = [ "cfg-if", "parking_lot", "pyo3", - "pyo3-async-runtimes", "pyo3-build-config", "tokio", ] @@ -42,51 +41,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" -[[package]] -name = "futures-channel" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" -dependencies = [ - "futures-core", -] - -[[package]] -name = "futures-core" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" - -[[package]] -name = "futures-macro" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-task" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" - -[[package]] -name = "futures-util" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" -dependencies = [ - "futures-core", - "futures-macro", - "futures-task", - "pin-project-lite", - "slab", -] - [[package]] name = "heck" version = "0.5.0" @@ -182,20 +136,6 @@ dependencies = [ "pyo3-macros", ] -[[package]] -name = "pyo3-async-runtimes" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e7364a95bf00e8377bbf9b0f09d7ff9715a29d8fcf93b47d1a967363b973178" -dependencies = [ - "futures-channel", - "futures-util", - "once_cell", - "pin-project-lite", - "pyo3", - "tokio", -] - [[package]] name = "pyo3-build-config" version = "0.28.3" @@ -286,12 +226,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" -[[package]] -name = "slab" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" - [[package]] name = "smallvec" version = "1.15.1" From c848ac0a6aefd7a07237b19e20817258f026a7e4 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 18 May 2026 20:10:00 +0330 Subject: [PATCH 05/60] Update exports --- src/lib.rs | 4 ---- src/pyclasses/base.rs | 30 ------------------------------ 2 files changed, 34 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f7d2d99..2fd4769 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,10 +16,6 @@ pub mod pyclasses; mod _core { use crate::typeref; - #[pymodule_export] - use crate::pyclasses::base::PyAsyncBaseCacheImpl; - #[pymodule_export] - use crate::pyclasses::base::PyAsyncBaseIteratorImpl; #[pymodule_export] use crate::pyclasses::base::PyBaseCacheImpl; #[pymodule_export] diff --git a/src/pyclasses/base.rs b/src/pyclasses/base.rs index 7db625c..43a8cf7 100644 --- a/src/pyclasses/base.rs +++ b/src/pyclasses/base.rs @@ -9,15 +9,6 @@ crate::implement_pyclass! { #[derive(Debug, Default, Clone, Copy)] [subclass, generic, frozen] PyBaseCacheImpl as "BaseCacheImpl" ; } -crate::implement_pyclass! { - /// Base implementation for cache classes in the cachebox library. - /// - /// This abstract base class defines the generic structure for cache implementations, - /// supporting different key and value types through generic type parameters. - /// Serves as a foundation for specific cache variants like Cache and FIFOCache. - #[derive(Debug, Default, Clone, Copy)] - [subclass, generic, frozen] PyAsyncBaseCacheImpl as "AsyncBaseCacheImpl" ; -} crate::implement_pyclass! { /// Base implementation for cache classes in the cachebox library. /// @@ -27,15 +18,6 @@ crate::implement_pyclass! { #[derive(Debug, Default, Clone, Copy)] [subclass, generic, frozen] PyBaseIteratorImpl as "BaseIteratorImpl" ; } -crate::implement_pyclass! { - /// Base implementation for cache classes in the cachebox library. - /// - /// This abstract base class defines the generic structure for cache implementations, - /// supporting different key and value types through generic type parameters. - /// Serves as a foundation for specific cache variants like Cache and FIFOCache. - #[derive(Debug, Default, Clone, Copy)] - [subclass, generic, frozen] PyAsyncBaseIteratorImpl as "AsyncBaseIteratorImpl" ; -} #[pyo3::pymethods] impl PyBaseCacheImpl { @@ -48,15 +30,3 @@ impl PyBaseCacheImpl { fn __init__(&self) {} } - -#[pyo3::pymethods] -impl PyAsyncBaseCacheImpl { - #[new] - #[pyo3(signature = (*args, **kwargs))] - #[allow(unused_variables)] - fn __new__(args: alias::ArgsType, kwargs: Option) -> Self { - Self - } - - fn __init__(&self) {} -} From 6114962abc3fddd6e7a6dc2dedad4bdccb02c386 Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 19 May 2026 11:09:19 +0330 Subject: [PATCH 06/60] Test Cache, Fix a bug --- cachebox/__init__.py | 3 +- cachebox/_core.pyi | 39 +- pyproject.toml | 3 - requirements-dev.txt | 2 + src/policies/nopolicy.rs | 6 +- src/policies/wrapped.rs | 2 +- src/pyclasses/{cache/sync.rs => cache.rs} | 0 src/pyclasses/cache/mod.rs | 6 - tests/__init__.py | 0 tests/cache.py | 78 +++ tests/mixins.py | 628 ++++++++++++++++++++++ 11 files changed, 726 insertions(+), 41 deletions(-) create mode 100644 requirements-dev.txt rename src/pyclasses/{cache/sync.rs => cache.rs} (100%) delete mode 100644 src/pyclasses/cache/mod.rs create mode 100644 tests/__init__.py create mode 100644 tests/cache.py create mode 100644 tests/mixins.py diff --git a/cachebox/__init__.py b/cachebox/__init__.py index fa5bd09..7e80a87 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -1 +1,2 @@ -from ._core import Cache +from ._core import BaseCacheImpl as BaseCacheImpl +from ._core import Cache as Cache diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 86aac17..aae7911 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -2,6 +2,13 @@ import typing from _typeshed import SupportsItems +_IterableType: typing.TypeAlias = ( + typing.Dict[KT, VT] + | SupportsItems[KT, VT] + | BaseCacheImpl[KT, VT] + | typing.Iterable[typing.Tuple[KT, VT]] +) + KT = typing.TypeVar("KT") VT = typing.TypeVar("VT") DT = typing.TypeVar("DT") @@ -18,12 +25,7 @@ class BaseCacheImpl(typing.Generic[KT, VT]): def __init__( self, maxsize: int, - iterable: ( - typing.Dict[KT, VT] - | SupportsItems[KT, VT] - | typing.Iterable[typing.Tuple[KT, VT]] - | None - ) = None, + iterable: _IterableType[KT, VT] | None = None, *, capacity: int = 0, getsizeof: typing.Callable[[KT, VT]] | None = None, @@ -46,11 +48,7 @@ class BaseCacheImpl(typing.Generic[KT, VT]): def __setitem__(self, key: KT, value: VT) -> None: ... def update( self, - iterable: ( - typing.Dict[KT, VT] - | SupportsItems[KT, VT] - | typing.Iterable[typing.Tuple[KT, VT]] - ), + iterable: _IterableType[KT, VT], *args: typing.Any, **kwargs: typing.Any, ) -> None: ... @@ -90,7 +88,6 @@ class Cache(BaseCacheImpl[KT, VT]): `Cache` is essentially a configurable hashmap-like store. When an item is inserted: - It is stored directly without any ordering, priority tracking, or access metadata. - If a maximum size is configured, insertions beyond that limit are rejected (raises OverflowError). - A max size of zero means unlimited. - All read and write operations are thread-safe, making it safe for concurrent access without external locking. @@ -125,12 +122,7 @@ class Cache(BaseCacheImpl[KT, VT]): def __init__( self, maxsize: int, - iterable: ( - typing.Dict[KT, VT] - | SupportsItems[KT, VT] - | typing.Iterable[typing.Tuple[KT, VT]] - | None - ) = ..., + iterable: _IterableType[KT, VT] | None = None, *, capacity: int = ..., getsizeof: typing.Callable[[KT, VT]] | None = ..., @@ -207,14 +199,7 @@ class Cache(BaseCacheImpl[KT, VT]): """ ... - def update( - self, - iterable: ( - typing.Dict[KT, VT] - | SupportsItems[KT, VT] - | typing.Iterable[typing.Tuple[KT, VT]] - ), - ) -> None: + def update(self, iterable: _IterableType[KT, VT]) -> None: """ Updates the cache with elements from a dictionary or an iterable object of key/value pairs. """ @@ -263,7 +248,7 @@ class Cache(BaseCacheImpl[KT, VT]): ... def popitem(self) -> typing.Tuple[KT, VT]: - """Always raises `NotImplementedError` because `Cache` has neither policy nor algorithm to evict items.""" + """Always raises `OverflowError` because `Cache` has neither policy nor algorithm to evict items.""" ... def drain(self, n: int) -> int: diff --git a/pyproject.toml b/pyproject.toml index 9200205..0b3db93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,9 +42,6 @@ Homepage = 'https://github.com/awolverp/cachebox' [project.optional-dependencies] -[tool.pytest.ini_options] -asyncio_default_fixture_loop_scope = "function" - [tool.maturin] features = ["pyo3/extension-module"] module-name = "cachebox._core" diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..9b9e5f9 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +pytest +hypothesis diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index f7aa25a..9875592 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -115,7 +115,7 @@ impl traits::EntryExt for Occupied<'_> { .currsize .saturating_add(extra_size) .saturating_sub(handle.size) - >= self.policy.maxsize.get() + > self.policy.maxsize.get() } fn evict(&mut self) -> pyo3::PyResult { @@ -155,7 +155,7 @@ impl traits::EntryExt for Vacant<'_> { type Handle = Handle; fn would_exceed(&self, extra_size: usize) -> bool { - self.policy.currsize.saturating_add(extra_size) >= self.policy.maxsize.get() + self.policy.currsize.saturating_add(extra_size) > self.policy.maxsize.get() } fn evict(&mut self) -> pyo3::PyResult { @@ -311,7 +311,7 @@ impl traits::PolicyExt for NoPolicy { fn evict(&mut self) -> pyo3::PyResult { Err(new_py_error!( - PyNotImplementedError, + PyOverflowError, "The cache has no algorithm to evict items" )) } diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs index 74419d7..b443d02 100644 --- a/src/policies/wrapped.rs +++ b/src/policies/wrapped.rs @@ -156,7 +156,7 @@ impl Wrapped

{ // including our cache classes let items_iterable = { if let Some(items_attribute) = iterable.getattr_opt(c"items")? { - items_attribute + items_attribute.call0()? } else { iterable } diff --git a/src/pyclasses/cache/sync.rs b/src/pyclasses/cache.rs similarity index 100% rename from src/pyclasses/cache/sync.rs rename to src/pyclasses/cache.rs diff --git a/src/pyclasses/cache/mod.rs b/src/pyclasses/cache/mod.rs deleted file mode 100644 index adedb33..0000000 --- a/src/pyclasses/cache/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ -mod sync; - -pub use sync::PyCache; -pub use sync::PyCacheItems; -pub use sync::PyCacheKeys; -pub use sync::PyCacheValues; diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cache.py b/tests/cache.py new file mode 100644 index 0000000..e52db21 --- /dev/null +++ b/tests/cache.py @@ -0,0 +1,78 @@ +import typing + +import pytest + +import cachebox + +from . import mixins + + +class TestCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.BaseCacheImpl: + return cachebox.Cache(maxsize, iterable, capacity=capacity, getsizeof=getsizeof) + + def test_popitem_overflow_error(self): + cache = self.create_cache() + + # cachebox.Cache does not have any algorithm to use + with pytest.raises(OverflowError): + cache.popitem() + + def test_insert_overflow_error(self): + cache = self.create_cache(5) + + for i in range(5): + cache.insert(i, i) + + with pytest.raises(OverflowError): + cache.insert(6, 6) + + cache.insert(4, "A") # <- Replacing should be OK + + # Try again with custom getsizeof + cache = self.create_cache(5, getsizeof=lambda k, v: len(k)) + cache.insert("AA", 1) + cache.insert("BBB", 1) # <- Now is full + + assert cache.is_full() + + with pytest.raises(OverflowError): + cache.insert("NEW", 1) + + cache.insert("AA", "A") # <- Replacing should be OK + + def test_update_overflow_error(self): + with pytest.raises(OverflowError): + self.create_cache(5, {i: i for i in range(6)}) + + cache = self.create_cache(5) + cache.update({i: i for i in range(5)}) # <- Now is full + + with pytest.raises(OverflowError): + cache.insert(6, 6) + + with pytest.raises(OverflowError): + cache.update({10: 10}) + + # Replacing should be OK + cache.update({i: i for i in range(5)}) diff --git a/tests/mixins.py b/tests/mixins.py new file mode 100644 index 0000000..54fd280 --- /dev/null +++ b/tests/mixins.py @@ -0,0 +1,628 @@ +import dataclasses +import sys +import typing + +import pytest +from hypothesis import assume, given +from hypothesis import strategies as st + +import cachebox + +# Strategy for keys that are hashable (str, int, tuple of ints) +hashable_keys = st.one_of( + st.text(), + st.integers(), + st.tuples(st.integers(), st.integers()), +) + +# Strategy for arbitrary values +any_value = st.one_of( + st.none(), + st.booleans(), + st.integers(), + st.floats(allow_nan=False), + st.text(), + st.binary(), + st.lists(st.integers(), max_size=5), +) + + +class BaseMixin: + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.BaseCacheImpl: + raise NotImplementedError + + +class InitializeMixin(BaseMixin): + def test_empty_on_creation(self): + cache = self.create_cache() + assert len(cache) == 0 + + def test_maxsize_stored(self): + cache = self.create_cache() + assert cache.maxsize() == 10 + + def test_maxsize_zero_means_unlimited(self): + cache = self.create_cache(0) + assert cache.maxsize() == sys.maxsize + + def test_init_from_dict(self): + c = self.create_cache(maxsize=10, iterable={"a": 1, "b": 2}) + assert c.get("a") == 1 + assert c.get("b") == 2 + assert len(c) == 2 + + def test_init_from_list_of_tuples(self): + c = self.create_cache(maxsize=10, iterable=[("x", 10), ("y", 20)]) + assert c.get("x") == 10 + assert c.get("y") == 20 + + def test_init_from_other_cache(self): + iterable = self.create_cache(maxsize=10, iterable=[("x", 10), ("y", 20)]) + + c = self.create_cache(maxsize=10, iterable=iterable) + assert c.get("x") == 10 + assert c.get("y") == 20 + + def test_capacity_param(self): + c = self.create_cache(maxsize=10, capacity=10) + assert c.capacity() >= 10 + + def test_getsizeof_stored(self): + sizer = lambda k, v: len(v) # noqa: E731 + + c = self.create_cache(maxsize=100, getsizeof=sizer) + assert c.getsizeof() is sizer + + +class InsertAndGetMixin(BaseMixin): + def test_insert_returns_none_on_new_key(self): + cache = self.create_cache() + + result = cache.insert("k", "v") + assert result is None + + def test_insert_returns_old_value_on_update(self): + cache = self.create_cache() + + cache.insert("k", "v1") + result = cache.insert("k", "v2") + assert result == "v1" + + def test_get_existing_key(self): + cache = self.create_cache() + + cache.insert("k", 42) + assert cache.get("k") == 42 + + def test_get_missing_key_returns_none(self): + cache = self.create_cache() + + assert cache.get("nope") is None + + def test_get_missing_key_returns_custom_default(self): + cache = self.create_cache() + + assert cache.get("nope", "fallback") == "fallback" + + def test_setitem_getitem(self): + cache = self.create_cache() + + cache["k"] = "v" + assert cache["k"] == "v" + + def test_getitem_missing_raises_keyerror(self): + cache = self.create_cache() + + with pytest.raises(KeyError): + _ = cache["ghost"] + + def test_none_value_stored_correctly(self): + cache = self.create_cache() + + cache.insert("k", None) + # None value is present — default should NOT be returned + assert cache.get("k", "MISS") is None + + def test_overwrite_keeps_len_unchanged(self): + cache = self.create_cache() + + cache.insert("k", 1) + cache.insert("k", 2) + assert len(cache) == 1 + + def test_insert_get_raw_type(self): + class AType: + pass + + cache = self.create_cache() + cache[AType] = AType + assert cache[AType] is AType + + +class SetDefaultMixin(BaseMixin): + def test_setdefault_inserts_when_absent(self): + cache = self.create_cache() + + result = cache.setdefault("k", "default") + assert result == "default" + assert cache.get("k") == "default" + + def test_setdefault_returns_existing_value(self): + cache = self.create_cache() + + cache.insert("k", "existing") + result = cache.setdefault("k", "default") + assert result == "existing" + assert cache.get("k") == "existing" + + +class PopAndDeleteMixin(BaseMixin): + def test_pop_existing_key(self): + cache = self.create_cache() + + cache.insert("k", "v") + result = cache.pop("k") + assert result == "v" + assert cache.get("k") is None + + def test_pop_missing_key_with_default(self): + cache = self.create_cache() + + assert cache.pop("ghost", "default") == "default" + + def test_pop_missing_key_raises_keyerror(self): + cache = self.create_cache() + + with pytest.raises(KeyError): + cache.pop("ghost") + + def test_delitem_existing_key(self): + cache = self.create_cache() + + cache["k"] = "v" + del cache["k"] + assert cache.get("k") is None + + def test_delitem_missing_key_raises_keyerror(self): + cache = self.create_cache() + + with pytest.raises(KeyError): + del cache["ghost"] + + +class UpdateMixin(BaseMixin): + def test_update_from_dict(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + assert cache.get("a") == 1 + assert cache.get("b") == 2 + + def test_update_from_other(self): + iterable = self.create_cache(10, ((str(i), i) for i in range(10))) + cache = self.create_cache() + + cache.update(iterable) + for i in range(10): + assert cache.get(str(i)) == i + + def test_update_from_list_of_tuples(self): + cache = self.create_cache() + + cache.update([("x", 10), ("y", 20)]) + assert cache.get("x") == 10 + assert cache.get("y") == 20 + + def test_update_overwrites_existing(self): + cache = self.create_cache() + + cache.insert("a", 1) + cache.update({"a": 99}) + assert cache.get("a") == 99 + + +class IntrospectionMixin(BaseMixin): + def test_len_reflects_insertions(self): + cache = self.create_cache() + + assert len(cache) == 0 + cache.insert("a", 1) + assert len(cache) == 1 + cache.insert("b", 2) + assert len(cache) == 2 + + def test_current_size_equals_len_without_getsizeof(self): + cache = self.create_cache() + + cache.insert("a", 1) + cache.insert("b", 2) + assert cache.current_size() == len(cache) + + def test_remaining_size(self): + cache = self.create_cache() + + cache.insert("a", 1) + assert cache.remaining_size() == cache.maxsize() - cache.current_size() + + def test_is_empty_on_new_cache(self): + cache = self.create_cache() + + assert cache.is_empty() + + def test_is_not_empty_after_insert(self): + cache = self.create_cache() + + cache.insert("k", "v") + assert not cache.is_empty() + + def test_bool_false_when_empty(self): + cache = self.create_cache() + + assert not bool(cache) + + def test_bool_true_when_not_empty(self): + cache = self.create_cache() + + cache.insert("k", "v") + assert bool(cache) + + def test_contains_operator(self): + cache = self.create_cache() + + cache.insert("k", "v") + assert "k" in cache + assert "ghost" not in cache + + def test_contains_method(self): + cache = self.create_cache() + + cache.insert("k", "v") + assert cache.contains("k") + assert not cache.contains("ghost") + + def test_repr_string(self): + cache = self.create_cache() + + cache.insert("k", "v") + out = repr(cache) + + assert isinstance(out, str) + assert type(cache).__name__ in out + + def test_eq_same_contents(self): + c1 = self.create_cache(maxsize=10, iterable={"a": 1}) + c2 = self.create_cache(maxsize=10, iterable={"a": 1}) + assert c1 == c2 + + def test_ne_different_contents(self): + c1 = self.create_cache(maxsize=10, iterable={"a": 1}) + c2 = self.create_cache(maxsize=10, iterable={"b": 2}) + assert c1 != c2 + + +class IterationMixin(BaseMixin): + def test_keys_returns_all_keys(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2, "c": 3}) + assert set(cache.keys()) == {"a", "b", "c"} + + def test_values_returns_all_values(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2, "c": 3}) + assert set(cache.values()) == {1, 2, 3} + + def test_items_returns_all_pairs(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + assert set(cache.items()) == {("a", 1), ("b", 2)} + + def test_iter_yields_keys(self): + cache = self.create_cache() + + cache.update({"x": 10, "y": 20}) + assert set(iter(cache)) == {"x", "y"} + + +class DrainClearShrinkMixin(BaseMixin): + def test_clear_removes_all_items(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + cache.clear() + assert len(cache) == 0 + assert cache.is_empty() + + def test_clear_with_reuse(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + cache.clear(reuse=True) + assert len(cache) == 0 + + def test_items_accessible_after_clear_and_reinsert(self): + cache = self.create_cache() + + cache.insert("a", 1) + cache.clear() + cache.insert("b", 2) + assert cache.get("b") == 2 + assert cache.get("a") is None + + def test_shrink_to_fit_does_not_lose_data(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2, "c": 3}) + cache.shrink_to_fit() + assert cache.get("a") == 1 + assert cache.get("b") == 2 + assert cache.get("c") == 3 + + +class CopyMixin(BaseMixin): + def test_copy_has_same_items(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + c2 = cache.copy() + assert set(c2.items()) == set(cache.items()) + + def test_copy_is_independent(self): + cache = self.create_cache() + + cache.insert("a", 1) + c2 = cache.copy() + c2.insert("b", 2) + assert not cache.contains("b") + + def test_copy_preserves_maxsize(self): + cache = self.create_cache() + + c2 = cache.copy() + assert c2.maxsize() == cache.maxsize() + + +class GetSizeOfMixin(BaseMixin): + def test_current_size_uses_getsizeof(self): + # Each value is a list; size = len(value) + sizer = lambda k, v: len(v) # noqa: E731 + + c = self.create_cache(maxsize=10, getsizeof=sizer) + c.insert("a", [1, 2, 3]) # size 3 + c.insert("b", [1]) # size 1 + assert c.current_size() == 4 + + def test_overflow_based_on_weighted_size(self): + # maxsize=5; each entry costs its value + sizer = lambda k, v: v # noqa: E731 + + c = self.create_cache(maxsize=5, getsizeof=sizer) + c.insert("a", 3) # size now 3 + c.insert("b", 2) # size now 5 — full + + if isinstance(c, cachebox.Cache): + with pytest.raises(OverflowError): + c.insert("c", 1) # would push to 6 + + +class EdgeCasesMixin(BaseMixin): + def test_integer_keys(self): + cache = self.create_cache() + + cache.insert(1, "one") + assert cache.get(1) == "one" + + def test_tuple_keys(self): + cache = self.create_cache() + + cache.insert((1, 2), "tuple") + assert cache.get((1, 2)) == "tuple" + + def test_empty_string_key_and_value(self): + cache = self.create_cache() + + cache.insert("", "") + assert cache.get("") == "" + + def test_large_value(self): + unlimited = self.create_cache(0) + + big = "x" * 100_000 + unlimited.insert("big", big) + assert unlimited.get("big") == big + + def test_multiple_types_as_values(self): + cache = self.create_cache() + + cache.insert("int", 1) + cache.insert("list", [1, 2]) + cache.insert("dict", {"a": 1}) + assert cache.get("int") == 1 + assert cache.get("list") == [1, 2] + assert cache.get("dict") == {"a": 1} + + +@dataclasses.dataclass +class EQ: + def __init__(self, val: int) -> None: + self.val = val + + def __eq__(self, other: "EQ") -> bool: + return self.val == other.val + + def __hash__(self) -> int: + return self.val + + +@dataclasses.dataclass +class NoEQ: + def __init__(self, val: int) -> None: + self.val = val + + def __hash__(self) -> int: + return self.val + + +class IssuesMixin(BaseMixin): + def test_issue_5(self): + # https://github.com/awolverp/cachebox/issues/5 + + size = 1000 + cache = self.create_cache(size, capacity=size) + + for i in range(size): + cache.insert(NoEQ(val=i), i) + cache.get(NoEQ(val=i)) + + cache = self.create_cache(size, capacity=size) + + for i in range(size): + cache.insert(EQ(val=i), i) + cache.get(EQ(val=i)) + + +class FuzzyMixin(BaseMixin): + @given(key=hashable_keys, value=any_value) + def test_insert_then_get_returns_same_value(self, key, value): + c = self.create_cache(maxsize=0) + c.insert(key, value) + assert c.get(key) == value + + @given(key=hashable_keys, value=any_value) + def test_insert_new_key_returns_none(self, key, value): + c = self.create_cache(maxsize=0) + result = c.insert(key, value) + assert result is None + + @given(key=hashable_keys, v1=any_value, v2=any_value) + def test_insert_existing_key_returns_old_value(self, key, v1, v2): + c = self.create_cache(maxsize=0) + c.insert(key, v1) + old = c.insert(key, v2) + assert old == v1 + + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_len_never_exceeds_unique_keys(self, pairs): + c = self.create_cache(maxsize=0) + expected = {} + for k, v in pairs: + c.insert(k, v) + expected[k] = v + assert len(c) == len(expected) + + @given(key=hashable_keys, value=any_value) + def test_len_increases_by_one_on_new_key(self, key, value): + c = self.create_cache(maxsize=0) + before = len(c) + c.insert(key, value) + assert len(c) == before + 1 + + @given(key=hashable_keys, v1=any_value, v2=any_value) + def test_len_unchanged_on_overwrite(self, key, v1, v2): + c = self.create_cache(maxsize=0) + c.insert(key, v1) + before = len(c) + c.insert(key, v2) + assert len(c) == before + + @given(key=hashable_keys, value=any_value) + def test_contains_true_after_insert(self, key, value): + c = self.create_cache(maxsize=0) + c.insert(key, value) + assert key in c + assert c.contains(key) + + @given(key=hashable_keys, value=any_value) + def test_contains_false_after_delete(self, key, value): + c = self.create_cache(maxsize=0) + c.insert(key, value) + del c[key] + assert key not in c + + @given(key=hashable_keys, value=any_value) + def test_pop_returns_inserted_value(self, key, value): + c = self.create_cache(maxsize=0) + c.insert(key, value) + assert c.pop(key) == value + + @given(key=hashable_keys, value=any_value) + def test_pop_removes_key(self, key, value): + c = self.create_cache(maxsize=0) + c.insert(key, value) + c.pop(key) + assert key not in c + + @given( + maxsize=st.integers(min_value=1, max_value=50), + pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=50), + ) + def test_current_size_plus_remaining_equals_maxsize(self, maxsize, pairs): + c = self.create_cache(maxsize=maxsize) + for k, v in pairs: + if c.is_full(): + break + c.insert(k, v) + assert c.current_size() + c.remaining_size() == maxsize + + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_clear_always_leaves_cache_empty(self, pairs): + c = self.create_cache(maxsize=0) + for k, v in pairs: + c.insert(k, v) + c.clear() + assert len(c) == 0 + assert c.is_empty() + + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_keys_values_items_are_consistent(self, pairs): + c = self.create_cache(maxsize=0) + truth = {} + for k, v in pairs: + c.insert(k, v) + truth[k] = v + + cache_items = dict(c.items()) + assert cache_items == truth + assert set(c.keys()) == set(truth.keys()) + assert sorted(str(v) for v in c.values()) == sorted( + str(v) for v in truth.values() + ) + + @given(key=hashable_keys, existing=any_value, default=any_value) + def test_setdefault_never_overwrites_existing(self, key, existing, default): + c = self.create_cache(maxsize=0) + c.insert(key, existing) + c.setdefault(key, default) + assert c.get(key) == existing + + @given(key=hashable_keys, default=any_value) + def test_setdefault_inserts_when_missing(self, key, default): + c = self.create_cache(maxsize=0) + c.setdefault(key, default) + assert c.get(key) == default + + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_copy_equals_original(self, pairs): + c = self.create_cache(maxsize=0) + for k, v in pairs: + c.insert(k, v) + assert c.copy() == c + + @given( + key=hashable_keys, value=any_value, new_key=hashable_keys, new_value=any_value + ) + def test_copy_is_independent_of_original(self, key, value, new_key, new_value): + assume(new_key != key) + c = self.create_cache(maxsize=0) + c.insert(key, value) + c2 = c.copy() + c2.insert(new_key, new_value) + assert not c.contains(new_key) From 618f9ec42d760d31d16ded5d08da3103861f09ac Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 19 May 2026 15:15:53 +0330 Subject: [PATCH 07/60] Update cache structures Separated variables which are thread-safe, or can be atomic, to prevent using locks for a long time, and this helps us in async implementations. --- cachebox/_core.pyi | 16 ++- src/internal/onceinit.rs | 17 ++- src/internal/utils.rs | 27 +++-- src/policies/nopolicy.rs | 212 ++++++++++++++++++++------------ src/policies/traits.rs | 58 ++++++--- src/policies/wrapped.rs | 145 +++++++++++++--------- src/pyclasses/cache.rs | 253 +++++++++++++++++++++++---------------- tests/mixins.py | 18 +-- 8 files changed, 462 insertions(+), 284 deletions(-) diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index aae7911..9918ca0 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -30,9 +30,13 @@ class BaseCacheImpl(typing.Generic[KT, VT]): capacity: int = 0, getsizeof: typing.Callable[[KT, VT]] | None = None, ) -> None: ... + @property def maxsize(self) -> int: ... + @property def current_size(self) -> int: ... + @property def remaining_size(self) -> int: ... + @property def getsizeof(self) -> typing.Callable[[KT, VT]] | None: ... def capacity(self) -> int: ... def __len__(self) -> int: ... @@ -131,7 +135,7 @@ class Cache(BaseCacheImpl[KT, VT]): Initialize a new Cache instance. Args: - maxsize: Maximum number of elements the cache can hold. Zero means unlimited. + maxsize: Maximum number of elements the cache can hold. If zero, the limit is set to sys.maxsize internally. iterable: Initial data to populate the cache. capacity: Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. getsizeof: A callable that computes the size of a key-value pair. When `None`, each @@ -144,18 +148,22 @@ class Cache(BaseCacheImpl[KT, VT]): """ ... + @property def maxsize(self) -> int: """Returns the specified `maxsize`""" ... + @property def current_size(self) -> int: """Returns the current total cumulative size consumed by all stored entries.""" ... + @property def remaining_size(self) -> int: """Returns the remaining size. Equals to `maxsize - current_size`""" ... + @property def getsizeof(self) -> typing.Callable[[KT, VT]] | None: """Returns the `getsizeof` function""" ... @@ -172,7 +180,8 @@ class Cache(BaseCacheImpl[KT, VT]): """ Returns `true` if the cache contains an entry for `key`. Equals to `key in self`. - It's recommended to use this method instead of `key in self`. + It's recommended to use this method instead of `key in self`, as it keeps code + compatible across different cache policies. """ ... @@ -192,7 +201,8 @@ class Cache(BaseCacheImpl[KT, VT]): - If the cache did have this key present, the value is updated, and the old value is returned. The key is not updated, though; - It's recommended to use this method instead of `self[key] = value`. + It's recommended to use this method instead of `self[key] = value`, as it keeps code + compatible across different cache policies. Note: raises `OverflowError` if the cache reached the maxsize limit, because this class does not have any algorithm. diff --git a/src/internal/onceinit.rs b/src/internal/onceinit.rs index 0445db0..0e79d44 100644 --- a/src/internal/onceinit.rs +++ b/src/internal/onceinit.rs @@ -22,7 +22,7 @@ pub struct OnceInitInner { state: atomic::AtomicU8, /// Heap-allocated storage that is uninitialized until [`set`](OnceInit::set) completes. /// Wrapped in a [`std::sync::Mutex`] so that post-init access is safe across threads. - value: cell::UnsafeCell>>, + value: cell::UnsafeCell>, } /// A thread-safe, write-once container for PyO3 `__new__` / `__init__` two-phase construction. @@ -58,7 +58,7 @@ impl OnceInit { pub fn new(val: T) -> Self { OnceInitInner { state: atomic::AtomicU8::new(INIT), - value: cell::UnsafeCell::new(mem::MaybeUninit::new(std::sync::Mutex::new(val))), + value: cell::UnsafeCell::new(mem::MaybeUninit::new(val)), } .into() } @@ -87,23 +87,20 @@ impl OnceInit { already_init_panic(); } // SAFETY: we own the RUNNING token — no other thread can write value. - unsafe { (*self.0.value.get()).write(std::sync::Mutex::new(val)) }; + unsafe { (*self.0.value.get()).write(val) }; self.0.state.store(INIT, atomic::Ordering::Release); } - /// Locks the inner [`std::sync::Mutex`] and returns a guard that dereferences to `T`. - /// - /// This is the primary read/write accessor after initialization. Multiple threads - /// may call `lock` concurrently; they will be serialized by the inner mutex. + /// Returns an immutable reference to initialized value. /// /// # Panics /// /// Panics if called before [`set`](Self::set) has completed. #[inline] - pub fn lock(&self) -> std::sync::MutexGuard<'_, T> { + pub fn get(&self) -> &T { if std::hint::likely(self.0.state.load(atomic::Ordering::Acquire) == INIT) { // SAFETY: state == INIT guarantees `value` was fully written and is valid. - unsafe { (*self.0.value.get()).assume_init_ref().lock().unwrap() } + unsafe { (*self.0.value.get()).assume_init_ref() } } else { not_init_panic() } @@ -124,7 +121,7 @@ impl From> for OnceInit { // SAFETY: Mutex is Send+Sync when T: Send; we uphold the init invariant ourselves. unsafe impl Send for OnceInit {} -unsafe impl Sync for OnceInit {} +unsafe impl Sync for OnceInit {} impl Drop for OnceInit { /// Drops the inner value if and only if [`set`](OnceInit::set) was called. diff --git a/src/internal/utils.rs b/src/internal/utils.rs index 46eeda6..686df02 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -50,10 +50,14 @@ pub unsafe fn pyobject_equal( /// Calls a Python `getsizeof(key, value) -> int` callable via raw FFI for maximum performance. /// +/// /// # Errors /// Propagates any Python exception raised by `getsizeof`, and also returns a `PyErr` if: /// - the return value is not an integer /// - `PyLong_AsSsize_t` returns `-1` with a live Python exception (overflow / type error) +/// +/// # Safety +/// Both pointers must be valid, non-null, live Python objects. #[inline(always)] #[optimize(speed)] pub unsafe fn call_getsizeof( @@ -179,23 +183,24 @@ where /// Returns the type name of a [`pyo3::ffi::PyObject`]. /// /// Returns `""` on failure. +/// +/// # Safety +/// The pointer must be valid, non-null, live Python object. #[inline] -pub fn get_type_name<'a>(py: pyo3::Python<'a>, obj: *mut pyo3::ffi::PyObject) -> String { +pub unsafe fn get_type_name<'a>(py: pyo3::Python<'a>, obj: *mut pyo3::ffi::PyObject) -> String { use pyo3::types::PyStringMethods; use pyo3::types::PyTypeMethods; - unsafe { - let type_ = pyo3::ffi::Py_TYPE(obj); + let type_ = pyo3::ffi::Py_TYPE(obj); - if type_.is_null() { - String::from("") - } else { - let obj = pyo3::types::PyType::from_borrowed_type_ptr(py, type_); + if type_.is_null() { + String::from("") + } else { + let obj = pyo3::types::PyType::from_borrowed_type_ptr(py, type_); - obj.fully_qualified_name() - .map(|x| x.to_string_lossy().into_owned()) - .unwrap_or_else(|_| String::from("")) - } + obj.fully_qualified_name() + .map(|x| x.to_string_lossy().into_owned()) + .unwrap_or_else(|_| String::from("")) } } diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 9875592..b045b04 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -1,3 +1,5 @@ +use std::sync::atomic; + use crate::hashbrown; use crate::internal::alias; use crate::internal::utils; @@ -66,7 +68,7 @@ impl Handle { /// Consumes `self` and returns the pair. pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { - (self.key.into(), self.value) + (self.key, self.value) } /// Makes a clone of self. @@ -101,40 +103,66 @@ impl traits::HandleExt for Handle { pub struct Occupied<'a> { /// The parent storage that owns the hash table. policy: &'a mut NoPolicy, + /// The shared configuration + shared: &'a Shared, /// Raw bucket pointing to the occupied slot within the hash table. bucket: hashbrown::raw::Bucket, } impl traits::EntryExt for Occupied<'_> { + type Shared = Shared; type Handle = Handle; fn would_exceed(&self, extra_size: usize) -> bool { let handle = unsafe { self.bucket.as_ref() }; + let currsize = self.shared.currsize.load(atomic::Ordering::Relaxed); - self.policy - .currsize + currsize .saturating_add(extra_size) .saturating_sub(handle.size) - > self.policy.maxsize.get() + > self.shared.maxsize.get() } fn evict(&mut self) -> pyo3::PyResult { - self.policy.evict() + self.policy.evict(self.shared) } } impl traits::OccupiedExt for Occupied<'_> { fn remove(self) -> Self::Handle { let (h, _) = unsafe { self.policy.table.remove(self.bucket) }; - self.policy.currsize = self.policy.currsize.saturating_sub(h.size); - self.policy.gv.increment(); + + self.shared.currsize.store( + self.shared + .currsize + .load(atomic::Ordering::Relaxed) + .saturating_sub(h.size), + atomic::Ordering::SeqCst, + ); + self.shared.gv.increment(); + h } fn replace(self, new: Self::Handle) -> Self::Handle { - self.policy.currsize = self.policy.currsize.saturating_add(new.size); + self.shared.currsize.store( + self.shared + .currsize + .load(atomic::Ordering::Relaxed) + .saturating_add(new.size), + atomic::Ordering::SeqCst, + ); + let old = unsafe { std::mem::replace(self.bucket.as_mut(), new) }; - self.policy.currsize = self.policy.currsize.saturating_sub(old.size); + + self.shared.currsize.store( + self.shared + .currsize + .load(atomic::Ordering::Relaxed) + .saturating_sub(old.size), + atomic::Ordering::SeqCst, + ); + old } } @@ -146,26 +174,36 @@ impl traits::OccupiedExt for Occupied<'_> { pub struct Vacant<'a> { /// The parent policy that owns the hash table. policy: &'a mut NoPolicy, + /// The shared configuration + shared: &'a Shared, /// If true, means we used `.evict()` method, and empty slots are available /// in table; so we don't need to reserve a new one. space_available: bool, } impl traits::EntryExt for Vacant<'_> { + type Shared = Shared; type Handle = Handle; fn would_exceed(&self, extra_size: usize) -> bool { - self.policy.currsize.saturating_add(extra_size) > self.policy.maxsize.get() + let currsize = self.shared.currsize.load(atomic::Ordering::Relaxed); + currsize.saturating_add(extra_size) > self.shared.maxsize.get() } fn evict(&mut self) -> pyo3::PyResult { - self.policy.evict() + self.policy.evict(self.shared) } } impl traits::VacantExt for Vacant<'_> { fn insert(self, handle: Self::Handle) { - self.policy.currsize = self.policy.currsize.saturating_add(handle.size); + self.shared.currsize.store( + self.shared + .currsize + .load(atomic::Ordering::Relaxed) + .saturating_add(handle.size), + atomic::Ordering::SeqCst, + ); if !self.space_available { self.policy.table.reserve(1, |x| x.key.hash()); @@ -174,24 +212,16 @@ impl traits::VacantExt for Vacant<'_> { self.policy.table.insert_no_grow(handle.key.hash(), handle); } - self.policy.gv.increment(); + self.shared.gv.increment(); } } -/// A cache policy that performs **no eviction**. -/// -/// Insertions are rejected once `currsize` would exceed `maxsize`; the caller -/// must free space manually or accept the refusal. This is useful when the -/// eviction strategy is handled externally, or when a hard size cap with no -/// silent data loss is desired. -pub struct NoPolicy { - /// The raw hash table storing all live [`Handle`] entries. - table: hashbrown::raw::RawTable, - /// Hard upper bound on `currsize`. Stored as [`NonZeroUsize`](std::num::NonZeroUsize) +pub struct Shared { + // Hard upper bound on `currsize`. Stored as [`NonZeroUsize`](std::num::NonZeroUsize) /// so the compiler can elide a zero-check branch in division/comparison hot paths. maxsize: std::num::NonZeroUsize, /// Running total of all stored handles' sizes, maintained incrementally. - currsize: usize, + currsize: atomic::AtomicUsize, /// Monotonically incrementing counter bumped on every structural mutation /// (insert, remove, clear, shrink). Used to detect iterator invalidation. gv: utils::GenerationVersion, @@ -199,62 +229,77 @@ pub struct NoPolicy { getsizeof: utils::GetsizeofFunction, } -impl NoPolicy { - /// Creates a new [`NoPolicy`] with the given initial `capacity` (number of slots) - /// and a `maxsize` budget limit. - /// - /// The underlying hash table is pre-allocated to hold at least `capacity` entries - /// without reallocation. +impl Shared { + /// Creates a new [`NoPolicy`]. #[inline] - pub fn new(capacity: usize, maxsize: usize, getsizeof: Option) -> Self { + pub fn new(maxsize: usize, getsizeof: Option) -> Self { Self { - table: hashbrown::raw::RawTable::with_capacity(capacity), maxsize: safe_non_zero!(maxsize), - currsize: 0, + currsize: atomic::AtomicUsize::new(0), gv: utils::GenerationVersion::default(), getsizeof: utils::GetsizeofFunction::new(getsizeof), } } +} - /// Returns a reference to the underlying raw hash table. - pub fn table(&self) -> &hashbrown::raw::RawTable { - &self.table +impl traits::SharedExt for Shared { + fn maxsize(&self) -> usize { + self.maxsize.get() } - /// Returns a snapshot of the current [`utils::GenerationVersion`]. - /// - /// Callers can compare a saved snapshot against a later call to detect - /// whether the table was mutated in the interim. - pub fn generation_version(&self) -> utils::GenerationVersion { + fn current_size(&self) -> usize { + self.currsize.load(atomic::Ordering::Relaxed) + } + + fn generation_version(&self) -> utils::GenerationVersion { self.gv.clone() } - /// Returns a reference to the size-measuring function used during insertion. - pub fn getsizeof(&self) -> &utils::GetsizeofFunction { + fn getsizeof(&self) -> &utils::GetsizeofFunction { &self.getsizeof } - /// Makes a clone of `self`. - pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { - let mut table = hashbrown::raw::RawTable::with_capacity(self.table.capacity()); - - unsafe { - for handle in self.table.iter().map(|x| x.as_ref()) { - table.insert_no_grow(handle.key.hash(), handle.clone_ref(py)); - } - } - + fn clone_ref(&self, py: pyo3::Python) -> Self { Self { - table, maxsize: self.maxsize, - currsize: self.currsize, - gv: utils::GenerationVersion::default(), + currsize: atomic::AtomicUsize::new(self.currsize.load(atomic::Ordering::Relaxed)), + gv: Default::default(), getsizeof: self.getsizeof.clone_ref(py), } } } +/// A cache policy that performs **no eviction**. +/// +/// Insertions are rejected once `currsize` would exceed `maxsize`; the caller +/// must free space manually or accept the refusal. This is useful when the +/// eviction strategy is handled externally, or when a hard size cap with no +/// silent data loss is desired. +pub struct NoPolicy { + /// The raw hash table storing all live [`Handle`] entries. + table: hashbrown::raw::RawTable, +} + +impl NoPolicy { + /// Creates a new [`NoPolicy`]. + /// + /// The underlying hash table is pre-allocated to hold at least `capacity` entries + /// without reallocation. + #[inline] + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + } + } + + /// Returns a reference to the underlying raw hash table. + pub fn table(&self) -> &hashbrown::raw::RawTable { + &self.table + } +} + impl traits::PolicyExt for NoPolicy { + type Shared = Shared; type Handle = Handle; type Occupied<'a> @@ -267,34 +312,27 @@ impl traits::PolicyExt for NoPolicy { where Self: 'a; - /// Returns the maximum allowed cumulative size of all stored entries. - fn maxsize(&self) -> usize { - self.maxsize.get() - } - - /// Returns the current cumulative size of all stored entries. - fn current_size(&self) -> usize { - self.currsize - } - fn get( &mut self, py: pyo3::Python, key: &::Key, + _shared: &Self::Shared, ) -> pyo3::PyResult> { let bucket = self.table.find(key.hash(), |x| key.py_eq(py, &x.key))?; Ok(bucket.map(|x| unsafe { x.as_ref() })) } - fn entry( - &mut self, + fn entry<'a>( + &'a mut self, py: pyo3::Python, key: &::Key, - ) -> pyo3::PyResult, Self::Vacant<'_>>> { + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { match self.table.find(key.hash(), |x| key.py_eq(py, &x.key))? { Some(bucket) => { let result = Occupied { policy: self, + shared, bucket, }; Ok(traits::PolicyEntry::Occupied(result)) @@ -302,6 +340,7 @@ impl traits::PolicyExt for NoPolicy { None => { let result = Vacant { policy: self, + shared, space_available: false, }; Ok(traits::PolicyEntry::Vacant(result)) @@ -309,32 +348,41 @@ impl traits::PolicyExt for NoPolicy { } } - fn evict(&mut self) -> pyo3::PyResult { + fn evict(&mut self, _shared: &Self::Shared) -> pyo3::PyResult { Err(new_py_error!( PyOverflowError, "The cache has no algorithm to evict items" )) } - fn shrink_to_fit(&mut self) { + fn shrink_to_fit(&mut self, shared: &Self::Shared) { let initial = self.table.capacity(); self.table.shrink_to(0, |x| x.key.hash()); if initial != self.table.capacity() { - self.gv.increment(); + shared.gv.increment(); } } - fn clear(&mut self) { + fn clear(&mut self, shared: &Self::Shared) { if self.table.is_empty() { return; } self.table.clear(); - self.gv.increment(); + shared.gv.increment(); + shared.currsize.store(0, atomic::Ordering::SeqCst); } - fn py_eq(&self, py: pyo3::Python, other: &Self) -> pyo3::PyResult { - if self.maxsize() != other.maxsize() || self.table.len() != other.table.len() { + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize.get() != other_shared.maxsize.get() + || self.table.len() != other.table.len() + { return Ok(false); } @@ -376,4 +424,16 @@ impl traits::PolicyExt for NoPolicy { } Ok(result) } + + fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.table.capacity()); + + unsafe { + for handle in self.table.iter().map(|x| x.as_ref()) { + table.insert_no_grow(handle.key.hash(), handle.clone_ref(py)); + } + } + + Self { table } + } } diff --git a/src/policies/traits.rs b/src/policies/traits.rs index a55fb91..1905364 100644 --- a/src/policies/traits.rs +++ b/src/policies/traits.rs @@ -1,3 +1,5 @@ +use crate::internal::utils; + pub trait HandleExt { type Key; @@ -16,6 +18,7 @@ pub trait HandleExt { /// Both variants hold a mutable borrow of the parent policy, so budget checks /// and eviction go through the entry rather than through the policy directly. pub trait EntryExt { + type Shared: SharedExt; type Handle: HandleExt; /// Returns `true` if adding `extra_size` would meet or exceed @@ -59,23 +62,37 @@ pub enum PolicyEntry { Vacant(V), } +pub trait SharedExt: Send + Sync { + /// Returns the configured maxsize. + fn maxsize(&self) -> usize; + + /// Returns the current total cumulative size consumed by all stored entries. + fn current_size(&self) -> usize; + + /// Returns the generation version. + fn generation_version(&self) -> utils::GenerationVersion; + + /// Returns a reference to configued getsizeof function. + fn getsizeof(&self) -> &utils::GetsizeofFunction; + + /// Make a clone of `self`. + fn clone_ref(&self, py: pyo3::Python) -> Self; +} + pub trait PolicyExt { + /// Read-only variables, we keep this type separated from the main policy implementation, + /// because we need to access them outside of `Mutex`s. + type Shared: SharedExt; type Handle: HandleExt; - type Occupied<'a>: OccupiedExt + 'a + type Occupied<'a>: OccupiedExt + 'a where Self: 'a; - type Vacant<'a>: VacantExt + 'a + type Vacant<'a>: VacantExt + 'a where Self: 'a; - /// Returns the configured maxsize. - fn maxsize(&self) -> usize; - - /// Returns the current total cumulative size consumed by all stored entries. - fn current_size(&self) -> usize; - /// Looks up a handle by `hash` and `eq`, applying policy side-effects on hit. /// /// # Errors @@ -85,6 +102,7 @@ pub trait PolicyExt { &mut self, py: pyo3::Python, key: &::Key, + shared: &Self::Shared, ) -> pyo3::PyResult>; /// Returns a [`PolicyEntry`] for the slot at `hash` / `eq`. @@ -92,11 +110,12 @@ pub trait PolicyExt { /// # Errors /// /// Returns `Err` if `eq` raises a Python exception. - fn entry( - &mut self, + fn entry<'a>( + &'a mut self, py: pyo3::Python, key: &::Key, - ) -> pyo3::PyResult, Self::Vacant<'_>>>; + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>>; /// Evicts a handle according to the policy algorithm, returning it. /// @@ -107,14 +126,23 @@ pub trait PolicyExt { /// # Panics /// /// May panic if the policy is empty. - fn evict(&mut self) -> pyo3::PyResult; + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult; /// Removes all handles without shrinking the allocation. - fn clear(&mut self); + fn clear(&mut self, shared: &Self::Shared); /// Shrinks the internal allocation as close to length as possible. - fn shrink_to_fit(&mut self); + fn shrink_to_fit(&mut self, shared: &Self::Shared); /// Performs Python `==`. - fn py_eq(&self, py: pyo3::Python, other: &Self) -> pyo3::PyResult; + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult; + + /// Make a clone of `self`. + fn clone_ref(&self, py: pyo3::Python) -> Self; } diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs index b443d02..bbd6e01 100644 --- a/src/policies/wrapped.rs +++ b/src/policies/wrapped.rs @@ -1,15 +1,13 @@ -use std::ops::Deref; -use std::ops::DerefMut; - use crate::internal::alias; use crate::policies::traits::EntryExt; use crate::policies::traits::HandleExt; use crate::policies::traits::OccupiedExt; use crate::policies::traits::PolicyEntry; use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; use crate::policies::traits::VacantExt; -/// A transparent wrapper over [`PolicyExt`] implementations that adds +/// A wrapper over [`PolicyExt`] implementations that adds /// higher-level methods shared across all policies. /// /// - [`insert`](Wrapped::insert) @@ -17,43 +15,86 @@ use crate::policies::traits::VacantExt; /// - [`contains`](Wrapped::contains) /// - [`extend`](Wrapped::extend). /// -/// Because the wrapper is `#[repr(transparent)]` and implements [`Deref`] / [`DerefMut`], -/// all methods of the inner policy `P` are directly accessible without unwrapping. -#[repr(transparent)] -pub struct Wrapped(P); +/// The shared (lock-free) fields of the policy are accessible directly via +/// [`Wrapped::shared`], while mutable state is accessed through the inner +/// [`std::sync::Mutex`]. +pub struct Wrapped { + /// Read-only fields after initialization — no lock required. + /// Accessible directly without acquiring the mutex. + shared: P::Shared, + /// Mutable policy state — protected by a [`std::sync::Mutex`]. + inner: parking_lot::Mutex

, +} -impl Deref for Wrapped

{ - type Target = P; +impl Wrapped

{ + /// Wraps an existing policy alongside its shared (lock-free) data. + pub fn new(policy: P, shared: P::Shared) -> Self { + Self { + shared, + inner: parking_lot::Mutex::new(policy), + } + } - fn deref(&self) -> &Self::Target { - &self.0 + /// Returns a reference to the shared, lock-free fields of the policy. + pub fn shared(&self) -> &P::Shared { + &self.shared } -} -impl DerefMut for Wrapped

{ - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 + /// Acquires the mutex and returns a guard over the mutable policy state. + /// + /// # Panics + /// Panics if the mutex is poisoned. + pub fn policy(&self) -> parking_lot::MutexGuard<'_, P> { + self.inner.lock() } } -impl Wrapped

{ - /// Wraps an existing policy, granting access to the shared higher-level API. - pub fn new(policy: P) -> Self { - Self(policy) +fn insert_inner( + lock: &mut parking_lot::MutexGuard<'_, P>, + shared: &P::Shared, + py: pyo3::Python<'_>, + handle: P::Handle, +) -> pyo3::PyResult> { + let entry = lock.entry(py, handle.key(), shared)?; + match entry { + PolicyEntry::Occupied(mut occupied) => { + // Evict if need + while occupied.would_exceed(handle.size()) { + occupied.evict()?; + } + + Ok(Some(occupied.replace(handle))) + } + PolicyEntry::Vacant(mut vacant) => { + // Evict if need + while vacant.would_exceed(handle.size()) { + vacant.evict()?; + } + + vacant.insert(handle); + Ok(None) + } } +} +// Duplicate methods across all policies +impl Wrapped

{ /// Returns the remaining size. Equals to `maxsize - current_size`. pub fn remaining_size(&self) -> usize { - self.maxsize().checked_sub(self.current_size()).unwrap_or(0) + self.shared + .maxsize() + .saturating_sub(self.shared.current_size()) } /// Returns `true` if the cache contains an entry for `key`. pub fn contains( - &mut self, + &self, py: pyo3::Python<'_>, key: &::Key, ) -> pyo3::PyResult { - let handle = self.0.get(py, key)?; + let mut lock = self.inner.lock(); + + let handle = lock.get(py, key, &self.shared)?; Ok(handle.is_some()) } @@ -63,42 +104,24 @@ impl Wrapped

{ /// - If the key was already present, the old handle is replaced and returned as `Some`. /// - If the key was absent, the handle is inserted and `None` is returned. pub fn insert( - &mut self, + &self, py: pyo3::Python<'_>, handle: P::Handle, ) -> pyo3::PyResult> { - let entry = self.0.entry(py, handle.key())?; - - match entry { - PolicyEntry::Occupied(mut occupied) => { - // Evict if need - while occupied.would_exceed(handle.size()) { - occupied.evict()?; - } - - Ok(Some(occupied.replace(handle))) - } - PolicyEntry::Vacant(mut vacant) => { - // Evict if need - while vacant.would_exceed(handle.size()) { - vacant.evict()?; - } - - vacant.insert(handle); - Ok(None) - } - } + let mut lock = self.inner.lock(); + insert_inner(&mut lock, &self.shared, py, handle) } /// Removes the entry for `key` from the cache, returning its [`Handle`](PolicyExt::Handle) /// if it was present, or `None` if the key was not found. pub fn remove( - &mut self, + &self, py: pyo3::Python<'_>, key: &::Key, ) -> pyo3::PyResult> { - let entry = self.0.entry(py, key)?; + let mut lock = self.inner.lock(); + let entry = lock.entry(py, key, &self.shared)?; match entry { PolicyEntry::Occupied(occupied) => { let handle = occupied.remove(); @@ -122,17 +145,15 @@ impl Wrapped

{ /// other dict-like types; `.items()` is called and the result is iterated. /// - **Any other iterable** — iterated directly, with each element expected to /// unpack as a `(key, value)` pair. - pub fn extend( - &mut self, - iterable: alias::BoundObject, - mut transform: F, - ) -> pyo3::PyResult<()> + pub fn extend(&self, iterable: alias::BoundObject, mut transform: F) -> pyo3::PyResult<()> where F: FnMut(alias::PyObject, alias::PyObject) -> pyo3::PyResult, { use pyo3::types::PyAnyMethods; use pyo3::types::PyDictMethods; + let mut lock = self.inner.lock(); + // Using [pyo3::ffi::PyObject_TypeCheck] and [Bound::cast_unchecked] is so faster than [Bound::cast] let is_dictionary = unsafe { pyo3::ffi::PyObject_TypeCheck(iterable.as_ptr(), crate::typeref::STD_DICT_TYPE) == 1 @@ -146,7 +167,7 @@ impl Wrapped

{ .unwrap_unchecked() }; - self.insert(pair.py(), transform(key, value)?)?; + insert_inner(&mut lock, &self.shared, pair.py(), transform(key, value)?)?; } return Ok(()); @@ -166,7 +187,7 @@ impl Wrapped

{ let pair = pair?; let (key, value) = pair.extract::<(alias::PyObject, alias::PyObject)>()?; - self.insert(pair.py(), transform(key, value)?)?; + insert_inner(&mut lock, &self.shared, pair.py(), transform(key, value)?)?; } Ok(()) @@ -174,7 +195,7 @@ impl Wrapped

{ /// Calls the `evict()` `n` times and returns count of removed items. pub fn drain( - &mut self, + &self, py: pyo3::Python, n: pyo3::ffi::Py_ssize_t, ) -> pyo3::PyResult { @@ -182,9 +203,11 @@ impl Wrapped

{ return Ok(0); } + let mut lock = self.inner.lock(); + let mut count: pyo3::ffi::Py_ssize_t = 0; while count < n { - match self.0.evict() { + match lock.evict(&self.shared) { Ok(_) => {} Err(err) => { if !err.is_instance_of::(py) { @@ -200,4 +223,14 @@ impl Wrapped

{ Ok(count) } + + pub fn clone_ref(&self, py: pyo3::Python) -> Self { + let shared = self.shared.clone_ref(py); + let policy = self.inner.lock().clone_ref(py); + + Self { + shared, + inner: parking_lot::Mutex::new(policy), + } + } } diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index 15ec8bd..5caf0c5 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -4,6 +4,7 @@ use crate::internal::utils; use crate::policies::nopolicy; use crate::policies::traits::HandleExt; use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; use crate::policies::wrapped::Wrapped; implement_pyclass! { @@ -85,10 +86,13 @@ impl PyCache { capacity: usize, getsizeof: Option, ) -> pyo3::PyResult<()> { - let mut wrapped = Wrapped::new(nopolicy::NoPolicy::new(capacity, maxsize, getsizeof)); + let wrapped = Wrapped::new( + nopolicy::NoPolicy::new(capacity), + nopolicy::Shared::new(maxsize, getsizeof), + ); if let Some(iterable) = iterable { - let getsizeof = wrapped.getsizeof().clone_ref(py); + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); let result = wrapped.extend( // iterable object @@ -104,80 +108,85 @@ impl PyCache { } } - /// Returns the number of entries currently in the cache. - fn __len__(&self) -> usize { - let lock = self.0.lock(); - lock.table().len() - } - - fn __sizeof__(&self) -> usize { - let lock = self.0.lock(); - lock.table().capacity() * std::mem::size_of::() - } - - fn __bool__(&self) -> bool { - let lock = self.0.lock(); - lock.table().len() > 0 - } - - fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { - self.contains(py, key) - } - - /// Returns the specified `maxsize` + #[getter] fn maxsize(&self) -> usize { - let lock = self.0.lock(); - lock.maxsize() + let inner = self.0.get(); + inner.shared().maxsize() } - /// Returns the current total cumulative size consumed by all stored entries. + #[getter] fn current_size(&self) -> usize { - let lock = self.0.lock(); - lock.current_size() + let inner = self.0.get(); + inner.shared().current_size() } - /// Returns the remaining size. Equals to `maxsize - current_size` + #[getter] fn remaining_size(&self) -> usize { - let lock = self.0.lock(); - lock.remaining_size() + let inner = self.0.get(); + inner.remaining_size() } - /// Returns the `getsizeof` function + #[getter] fn getsizeof(&self, py: pyo3::Python) -> Option { - let lock = self.0.lock(); - lock.getsizeof().clone_ref(py).into() + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() } /// Returns the number of elements the map can hold without reallocating. fn capacity(&self) -> usize { - let lock = self.0.lock(); - lock.table().capacity() + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() } /// Returns the number of entries currently in the cache. - fn len(&self) -> usize { - let lock = self.0.lock(); - lock.table().len() + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().len() + } + + fn __sizeof__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() * std::mem::size_of::() + } + + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) } /// Returns `true` if the cache contains an entry for `key`. fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; - - let mut lock = self.0.lock(); - lock.contains(py, &key) + let inner = self.0.get(); + inner.contains(py, &key) } /// Returns `True` if cache is empty. fn is_empty(&self) -> bool { - let lock = self.0.lock(); - lock.table().len() == 0 + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().len() == 0 } /// Returns `True` when the cumulative size has reached the maxsize limit. fn is_full(&self) -> bool { - let lock = self.0.lock(); - lock.current_size() >= lock.maxsize() + let inner = self.0.get(); + let shared = inner.shared(); + + shared.current_size() >= shared.maxsize() } /// Equals to `self[key] = value`, but returns a value: @@ -194,10 +203,10 @@ impl PyCache { key: alias::PyObject, value: alias::PyObject, ) -> pyo3::PyResult> { - let mut lock = self.0.lock(); - let handle = nopolicy::Handle::new(py, lock.getsizeof(), key, value)?; + let inner = self.0.get(); + let handle = nopolicy::Handle::new(py, inner.shared().getsizeof(), key, value)?; - let old_handle = lock.insert(py, handle)?.map(|x| x.into_value()); + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); Ok(old_handle) } @@ -211,10 +220,10 @@ impl PyCache { return Ok(()); } - let mut lock = slf.0.lock(); - let getsizeof = lock.getsizeof().clone_ref(py); + let inner = slf.0.get(); + let getsizeof = inner.shared().getsizeof().clone_ref(py); - lock.extend( + inner.extend( // iterable object iterable.into_bound(py), // transform function @@ -228,10 +237,7 @@ impl PyCache { key: alias::PyObject, value: alias::PyObject, ) -> pyo3::PyResult<()> { - let mut lock = self.0.lock(); - let handle = nopolicy::Handle::new(py, lock.getsizeof(), key, value)?; - - lock.insert(py, handle)?; + self.insert(py, key, value)?; Ok(()) } @@ -255,9 +261,10 @@ impl PyCache { ) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; - let mut lock = self.0.lock(); + let inner = self.0.get(); + let mut policy = inner.policy(); - if let Some(x) = lock.get(py, &key)? { + if let Some(x) = policy.get(py, &key, inner.shared())? { return Ok(x.value().clone_ref(py)); } @@ -277,8 +284,10 @@ impl PyCache { ) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; - let mut lock = self.0.lock(); - match lock.get(py, &key)? { + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key, inner.shared())? { Some(x) => Ok(x.value().clone_ref(py)), None => Err(new_py_error!( PyKeyError, @@ -302,10 +311,14 @@ impl PyCache { // 3. Else -> insert default -> return default let key = utils::PrecomputedHashObject::new(py, key)?; - let mut lock = self.0.lock(); - if let Some(x) = lock.get(py, &key)? { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key, shared)? { return Ok(x.value().clone_ref(py)); } + drop(policy); let default_object = match default { utils::OptionalArgument::Defined(x) => x.unbind(), @@ -317,12 +330,12 @@ impl PyCache { let handle = nopolicy::Handle::with_precomputed_hash_key( py, - lock.getsizeof(), + shared.getsizeof(), key, default_object.clone_ref(py), )?; - lock.insert(py, handle)?; + inner.insert(py, handle)?; Ok(default_object) } @@ -338,8 +351,9 @@ impl PyCache { ) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; - let mut lock = self.0.lock(); - if let Some(x) = lock.remove(py, &key)? { + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { return Ok(x.into_value()); } @@ -355,8 +369,8 @@ impl PyCache { fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { let key = utils::PrecomputedHashObject::new(py, key)?; - let mut lock = self.0.lock(); - match lock.remove(py, &key)? { + let inner = self.0.get(); + match inner.remove(py, &key)? { Some(_) => Ok(()), None => Err(new_py_error!( PyKeyError, @@ -369,9 +383,11 @@ impl PyCache { /// /// NOTE: `Cache` always raises `NotImplementedError` because has neither policy nor algorithm to evict items. fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { - let mut lock = self.0.lock(); - let handle = lock.evict()?; - drop(lock); + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); let (key, val) = handle.into_pair(); Ok((key.into(), val)) @@ -383,23 +399,28 @@ impl PyCache { py: pyo3::Python, n: pyo3::ffi::Py_ssize_t, ) -> pyo3::PyResult { - let mut lock = self.0.lock(); - lock.drain(py, n) + let inner = self.0.get(); + inner.drain(py, n) } /// Shrinks the internal allocation as close to the current length as possible. fn shrink_to_fit(&self) { - self.0.lock().shrink_to_fit(); + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); } /// Removes all entries from the table and resets the cumulative size to zero. #[pyo3(signature=(*, reuse=false))] fn clear(&self, reuse: bool) { - let mut lock = self.0.lock(); - lock.clear(); + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); if !reuse { - lock.shrink_to_fit(); + policy.shrink_to_fit(shared); } } @@ -412,10 +433,18 @@ impl PyCache { return Ok(true); } - let self_lock = slf.0.lock(); - let other_lock = other.0.lock(); + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); - self_lock.py_eq(py, &*other_lock) + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) } fn __ne__( @@ -427,20 +456,30 @@ impl PyCache { return Ok(false); } - let self_lock = slf.0.lock(); - let other_lock = other.0.lock(); + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); - self_lock.py_eq(py, &*other_lock).map(|x| !x) + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) } fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { - let lock = self.0.lock(); - let gv = lock.generation_version(); + let inner = self.0.get(); + let gv = inner.shared().generation_version(); let initial_gv = gv.get(); // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] let result = PyCacheItems { - iter: parking_lot::Mutex::new(unsafe { lock.table().iter() }), + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), gv, initial_gv, }; @@ -448,13 +487,13 @@ impl PyCache { } fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { - let lock = self.0.lock(); - let gv = lock.generation_version(); + let inner = self.0.get(); + let gv = inner.shared().generation_version(); let initial_gv = gv.get(); // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] let result = PyCacheValues { - iter: parking_lot::Mutex::new(unsafe { lock.table().iter() }), + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), gv, initial_gv, }; @@ -462,13 +501,13 @@ impl PyCache { } fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { - let lock = self.0.lock(); - let gv = lock.generation_version(); + let inner = self.0.get(); + let gv = inner.shared().generation_version(); let initial_gv = gv.get(); // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] let result = PyCacheKeys { - iter: parking_lot::Mutex::new(unsafe { lock.table().iter() }), + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), gv, initial_gv, }; @@ -480,9 +519,9 @@ impl PyCache { } fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { - let lock = self.0.lock(); - let cloned = lock.clone_ref(py); - let result = Self(onceinit::OnceInit::new(Wrapped::new(cloned))); + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) } @@ -492,10 +531,13 @@ impl PyCache { } fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { - let lock = slf.0.lock(); + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); let iter = unsafe { - lock.table() + policy + .table() .iter() .map(|bucket| bucket.as_ref()) .map(|handle| { @@ -507,20 +549,21 @@ impl PyCache { }) }; - let items = utils::items_to_str(iter, lock.table().len()).unwrap(); + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); format!( "{}[{}/{}]({})", - utils::get_type_name(py, slf.as_ptr()), - lock.current_size(), - lock.maxsize(), + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + shared.current_size(), + shared.maxsize(), items ) } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - let lock = self.0.lock(); + let inner = self.0.get(); + let policy = inner.policy(); - for handle_ref in unsafe { lock.table().iter() } { + for handle_ref in unsafe { policy.table().iter() } { let handle = unsafe { handle_ref.as_ref() }; visit.call(handle.key().as_ref())?; @@ -530,7 +573,9 @@ impl PyCache { } fn __clear__(&self) { - self.0.lock().clear(); + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); } } diff --git a/tests/mixins.py b/tests/mixins.py index 54fd280..f778d0f 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -45,11 +45,11 @@ def test_empty_on_creation(self): def test_maxsize_stored(self): cache = self.create_cache() - assert cache.maxsize() == 10 + assert cache.maxsize == 10 def test_maxsize_zero_means_unlimited(self): cache = self.create_cache(0) - assert cache.maxsize() == sys.maxsize + assert cache.maxsize == sys.maxsize def test_init_from_dict(self): c = self.create_cache(maxsize=10, iterable={"a": 1, "b": 2}) @@ -77,7 +77,7 @@ def test_getsizeof_stored(self): sizer = lambda k, v: len(v) # noqa: E731 c = self.create_cache(maxsize=100, getsizeof=sizer) - assert c.getsizeof() is sizer + assert c.getsizeof is sizer class InsertAndGetMixin(BaseMixin): @@ -242,13 +242,13 @@ def test_current_size_equals_len_without_getsizeof(self): cache.insert("a", 1) cache.insert("b", 2) - assert cache.current_size() == len(cache) + assert cache.current_size == len(cache) def test_remaining_size(self): cache = self.create_cache() cache.insert("a", 1) - assert cache.remaining_size() == cache.maxsize() - cache.current_size() + assert cache.remaining_size == cache.maxsize - cache.current_size def test_is_empty_on_new_cache(self): cache = self.create_cache() @@ -387,7 +387,7 @@ def test_copy_preserves_maxsize(self): cache = self.create_cache() c2 = cache.copy() - assert c2.maxsize() == cache.maxsize() + assert c2.maxsize == cache.maxsize class GetSizeOfMixin(BaseMixin): @@ -398,7 +398,7 @@ def test_current_size_uses_getsizeof(self): c = self.create_cache(maxsize=10, getsizeof=sizer) c.insert("a", [1, 2, 3]) # size 3 c.insert("b", [1]) # size 1 - assert c.current_size() == 4 + assert c.current_size == 4 def test_overflow_based_on_weighted_size(self): # maxsize=5; each entry costs its value @@ -455,7 +455,7 @@ class EQ: def __init__(self, val: int) -> None: self.val = val - def __eq__(self, other: "EQ") -> bool: + def __eq__(self, other: "EQ") -> bool: # type: ignore return self.val == other.val def __hash__(self) -> int: @@ -570,7 +570,7 @@ def test_current_size_plus_remaining_equals_maxsize(self, maxsize, pairs): if c.is_full(): break c.insert(k, v) - assert c.current_size() + c.remaining_size() == maxsize + assert c.current_size + c.remaining_size == maxsize @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) def test_clear_always_leaves_cache_empty(self, pairs): From cd9a3f305ba782e6f07b0ae8bf79768989b64aba Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 19 May 2026 15:18:08 +0330 Subject: [PATCH 08/60] Fix cargo clippy warnings --- src/hashbrown/control/bitmask.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/hashbrown/control/bitmask.rs b/src/hashbrown/control/bitmask.rs index 7228312..8370515 100644 --- a/src/hashbrown/control/bitmask.rs +++ b/src/hashbrown/control/bitmask.rs @@ -37,11 +37,7 @@ impl BitMask { /// Returns the first set bit in the `BitMask`, if there is one. #[inline] pub(crate) fn lowest_set_bit(self) -> Option { - if let Some(nonzero) = NonZeroBitMaskWord::new(self.0) { - Some(Self::nonzero_trailing_zeros(nonzero)) - } else { - None - } + NonZeroBitMaskWord::new(self.0).map(Self::nonzero_trailing_zeros) } /// Returns the number of trailing zeroes in the `BitMask`. @@ -52,7 +48,7 @@ impl BitMask { // versions (pre-ARMv7) don't have RBIT and need to emulate it // instead. Since we only have 1 bit set in each byte on ARM, we can // use swap_bytes (REV) + leading_zeroes instead. - if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 { + if cfg!(target_arch = "arm") && BITMASK_STRIDE.is_multiple_of(8) { self.0.swap_bytes().leading_zeros() as usize / BITMASK_STRIDE } else { self.0.trailing_zeros() as usize / BITMASK_STRIDE @@ -62,7 +58,7 @@ impl BitMask { /// Same as above but takes a `NonZeroBitMaskWord`. #[inline] fn nonzero_trailing_zeros(nonzero: NonZeroBitMaskWord) -> usize { - if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 { + if cfg!(target_arch = "arm") && BITMASK_STRIDE.is_multiple_of(8) { // SAFETY: A byte-swapped non-zero value is still non-zero. let swapped = unsafe { NonZeroBitMaskWord::new_unchecked(nonzero.get().swap_bytes()) }; swapped.leading_zeros() as usize / BITMASK_STRIDE From de610212bf74cc0d146b2826c3e11c271b7c9edf Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 19 May 2026 19:45:23 +0330 Subject: [PATCH 09/60] Remove optimize attributes, and add some inline attributes; Change author --- Cargo.toml | 2 +- a.py | 64 ++++++++++++++++++++++++++++++++++++++++ src/internal/utils.rs | 21 ++++++------- src/lib.rs | 1 - src/policies/nopolicy.rs | 23 ++++++++++++++- src/policies/wrapped.rs | 10 +++++++ src/pyclasses/cache.rs | 16 +++++++++- src/typeref.rs | 3 +- tests/mixins.py | 1 + 9 files changed, 126 insertions(+), 15 deletions(-) create mode 100644 a.py diff --git a/Cargo.toml b/Cargo.toml index 3e89e41..e17400d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ readme = "README.md" license = "MIT" homepage = "https://github.com/awolverp/cachebox" repository = "https://github.com/awolverp/cachebox.git" -authors = ["awolverp"] +authors = ["Ali Pooralijan "] [lib] name = "cachebox" diff --git a/a.py b/a.py new file mode 100644 index 0000000..9afbb54 --- /dev/null +++ b/a.py @@ -0,0 +1,64 @@ +import timeit + +import cachebox + +# --- Setup --- +MAXSIZE = 100_000 +N = 1_000 # operations per benchmark +REPEAT = 100 +NUMBER = 100 + + +def make_cache(n: int = N) -> cachebox.Cache: + """Create a pre-populated cache for benchmarks that need existing keys.""" + c = cachebox.Cache(maxsize=MAXSIZE, capacity=n) + for i in range(n): + c.insert(i, f"value_{i}") + return c + + +# --- Benchmark definitions --- + + +def bench_insert(): + c = cachebox.Cache(maxsize=MAXSIZE, capacity=N) + for i in range(N): + c.insert(i, f"value_{i}") + + +def bench_get(): + c = make_cache() + for i in range(N): + c.get(i) + + +def bench_update(): + c = make_cache() + for i in range(N): + c.insert(i, f"new_value_{i}") # insert on existing key = update + + +def bench_delete(): + c = make_cache() + for i in range(N): + del c[i] + + +# --- Runner --- + +benchmarks = { + "insert": bench_insert, + "get": bench_get, + "update": bench_update, + "delete": bench_delete, +} + +print(f"Benchmark: {N} ops each, best of {REPEAT}x{NUMBER} runs\n") +print(f"{'Operation':<10} {'Best (ms)':>10} {'Per-op (µs)':>12}") +print("-" * 35) + +for name, fn in benchmarks.items(): + times = timeit.repeat(fn, repeat=REPEAT, number=NUMBER) + best_ms = min(times) / NUMBER * 1000 # best total run in ms + per_op_us = min(times) / NUMBER / N * 1_000_000 # per single op in µs + print(f"{name:<10} {best_ms:>10.3f} {per_op_us:>12.4f}") diff --git a/src/internal/utils.rs b/src/internal/utils.rs index 686df02..b24da65 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -9,8 +9,7 @@ use crate::internal::alias; /// /// # Safety /// Pointer must be valid, non-null, live Python objects. -#[inline(always)] -#[optimize(speed)] +#[inline] pub unsafe fn pyobject_hash( py: pyo3::Python<'_>, arg1: *mut pyo3::ffi::PyObject, @@ -28,8 +27,7 @@ pub unsafe fn pyobject_hash( /// /// # Safety /// Both pointers must be valid, non-null, live Python objects. -#[inline(always)] -#[optimize(speed)] +#[inline] pub unsafe fn pyobject_equal( py: pyo3::Python<'_>, arg1: *mut pyo3::ffi::PyObject, @@ -58,8 +56,7 @@ pub unsafe fn pyobject_equal( /// /// # Safety /// Both pointers must be valid, non-null, live Python objects. -#[inline(always)] -#[optimize(speed)] +#[inline] pub unsafe fn call_getsizeof( py: pyo3::Python<'_>, getsizeof: Option<&alias::PyObject>, @@ -115,6 +112,7 @@ pub unsafe fn call_getsizeof( /// Formats an iterator of key-value pairs into a string representation. /// /// Very useful for implementing `__repr__` methods. +#[inline(never)] pub fn items_to_str(items: I, length: usize) -> Result where K: std::fmt::Debug, @@ -186,7 +184,7 @@ where /// /// # Safety /// The pointer must be valid, non-null, live Python object. -#[inline] +#[inline(never)] pub unsafe fn get_type_name<'a>(py: pyo3::Python<'a>, obj: *mut pyo3::ffi::PyObject) -> String { use pyo3::types::PyStringMethods; use pyo3::types::PyTypeMethods; @@ -240,12 +238,12 @@ impl<'a, 'py> pyo3::FromPyObject<'a, 'py> for OptionalArgument<'py> { pub struct GenerationVersion(Arc); impl GenerationVersion { - #[inline] + #[inline(always)] pub fn increment(&self) -> u32 { self.0.fetch_add(1, atomic::Ordering::SeqCst) } - #[inline] + #[inline(always)] pub fn get(&self) -> u32 { self.0.load(atomic::Ordering::Relaxed) } @@ -275,11 +273,13 @@ impl PrecomputedHashObject { Ok(Self::with_precomputed_hash(object, hash)) } + #[inline] pub fn hash(&self) -> u64 { self.hash } /// Pointer-equality fast path, then Python `==`. + #[inline(always)] pub fn py_eq(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { unsafe { pyobject_equal(py, self.object.as_ptr(), other.object.as_ptr()) } } @@ -297,6 +297,7 @@ impl PrecomputedHashObject { impl AsRef for PrecomputedHashObject { /// Returns a reference to its pyobject + #[inline] fn as_ref(&self) -> &alias::PyObject { &self.object } @@ -309,7 +310,6 @@ impl From for alias::PyObject { } } -#[derive(Debug)] #[repr(transparent)] pub struct GetsizeofFunction(Option); @@ -327,6 +327,7 @@ impl GetsizeofFunction { } /// Calls the wrapped function to get size of the pair key-value. + #[inline] pub fn call( &self, py: pyo3::Python<'_>, diff --git a/src/lib.rs b/src/lib.rs index 2fd4769..2f45905 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,6 @@ #![feature(allocator_api)] #![feature(dropck_eyepatch)] #![feature(likely_unlikely)] -#![feature(optimize_attribute)] #[macro_use] mod macro_rules; diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index b045b04..f5130e6 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -23,6 +23,7 @@ pub struct Handle { impl Handle { /// Creates a new [`Handle`], which calculates the precomputed hash itself. + #[inline] pub fn new( py: pyo3::Python<'_>, getsizeof: &utils::GetsizeofFunction, @@ -41,6 +42,7 @@ impl Handle { /// /// Prefer this over [`Handle::new`] when the caller has already paid the cost /// of computing the hash (e.g. during a table lookup that preceded insertion). + #[inline] pub fn with_precomputed_hash_key( py: pyo3::Python<'_>, getsizeof: &utils::GetsizeofFunction, @@ -52,21 +54,25 @@ impl Handle { } /// Consumes `self` and returns the [`utils::PrecomputedHashObject`]. + #[inline] pub fn into_key(self) -> utils::PrecomputedHashObject { self.key } /// Returns a reference to the value. + #[inline] pub fn value(&self) -> &alias::PyObject { &self.value } /// Consumes `self` and returns the value of the pair. + #[inline] pub fn into_value(self) -> alias::PyObject { self.value } /// Consumes `self` and returns the pair. + #[inline] pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { (self.key, self.value) } @@ -74,6 +80,7 @@ impl Handle { /// Makes a clone of self. /// /// This creates another pointer to the same object, increasing its reference count. + #[inline] pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { Self { key: self.key.clone_ref(py), @@ -86,10 +93,12 @@ impl Handle { impl traits::HandleExt for Handle { type Key = utils::PrecomputedHashObject; + #[inline(always)] fn key(&self) -> &utils::PrecomputedHashObject { &self.key } + #[inline(always)] fn size(&self) -> usize { self.size } @@ -113,6 +122,7 @@ impl traits::EntryExt for Occupied<'_> { type Shared = Shared; type Handle = Handle; + #[inline] fn would_exceed(&self, extra_size: usize) -> bool { let handle = unsafe { self.bucket.as_ref() }; let currsize = self.shared.currsize.load(atomic::Ordering::Relaxed); @@ -123,6 +133,7 @@ impl traits::EntryExt for Occupied<'_> { > self.shared.maxsize.get() } + #[inline(always)] fn evict(&mut self) -> pyo3::PyResult { self.policy.evict(self.shared) } @@ -185,11 +196,13 @@ impl traits::EntryExt for Vacant<'_> { type Shared = Shared; type Handle = Handle; + #[inline] fn would_exceed(&self, extra_size: usize) -> bool { let currsize = self.shared.currsize.load(atomic::Ordering::Relaxed); currsize.saturating_add(extra_size) > self.shared.maxsize.get() } + #[inline(always)] fn evict(&mut self) -> pyo3::PyResult { self.policy.evict(self.shared) } @@ -243,18 +256,22 @@ impl Shared { } impl traits::SharedExt for Shared { + #[inline] fn maxsize(&self) -> usize { self.maxsize.get() } + #[inline] fn current_size(&self) -> usize { self.currsize.load(atomic::Ordering::Relaxed) } + #[inline] fn generation_version(&self) -> utils::GenerationVersion { self.gv.clone() } + #[inline] fn getsizeof(&self) -> &utils::GetsizeofFunction { &self.getsizeof } @@ -285,7 +302,6 @@ impl NoPolicy { /// /// The underlying hash table is pre-allocated to hold at least `capacity` entries /// without reallocation. - #[inline] pub fn new(capacity: usize) -> Self { Self { table: hashbrown::raw::RawTable::with_capacity(capacity), @@ -293,6 +309,7 @@ impl NoPolicy { } /// Returns a reference to the underlying raw hash table. + #[inline(always)] pub fn table(&self) -> &hashbrown::raw::RawTable { &self.table } @@ -312,6 +329,7 @@ impl traits::PolicyExt for NoPolicy { where Self: 'a; + #[inline] fn get( &mut self, py: pyo3::Python, @@ -348,6 +366,7 @@ impl traits::PolicyExt for NoPolicy { } } + #[inline] fn evict(&mut self, _shared: &Self::Shared) -> pyo3::PyResult { Err(new_py_error!( PyOverflowError, @@ -355,6 +374,7 @@ impl traits::PolicyExt for NoPolicy { )) } + #[inline] fn shrink_to_fit(&mut self, shared: &Self::Shared) { let initial = self.table.capacity(); self.table.shrink_to(0, |x| x.key.hash()); @@ -364,6 +384,7 @@ impl traits::PolicyExt for NoPolicy { } } + #[inline] fn clear(&mut self, shared: &Self::Shared) { if self.table.is_empty() { return; diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs index bbd6e01..6b86604 100644 --- a/src/policies/wrapped.rs +++ b/src/policies/wrapped.rs @@ -36,6 +36,7 @@ impl Wrapped

{ } /// Returns a reference to the shared, lock-free fields of the policy. + #[inline(always)] pub fn shared(&self) -> &P::Shared { &self.shared } @@ -44,11 +45,13 @@ impl Wrapped

{ /// /// # Panics /// Panics if the mutex is poisoned. + #[inline(always)] pub fn policy(&self) -> parking_lot::MutexGuard<'_, P> { self.inner.lock() } } +#[inline(always)] fn insert_inner( lock: &mut parking_lot::MutexGuard<'_, P>, shared: &P::Shared, @@ -80,6 +83,7 @@ fn insert_inner( // Duplicate methods across all policies impl Wrapped

{ /// Returns the remaining size. Equals to `maxsize - current_size`. + #[inline] pub fn remaining_size(&self) -> usize { self.shared .maxsize() @@ -87,6 +91,7 @@ impl Wrapped

{ } /// Returns `true` if the cache contains an entry for `key`. + #[inline] pub fn contains( &self, py: pyo3::Python<'_>, @@ -103,6 +108,7 @@ impl Wrapped

{ /// /// - If the key was already present, the old handle is replaced and returned as `Some`. /// - If the key was absent, the handle is inserted and `None` is returned. + #[inline] pub fn insert( &self, py: pyo3::Python<'_>, @@ -114,6 +120,7 @@ impl Wrapped

{ /// Removes the entry for `key` from the cache, returning its [`Handle`](PolicyExt::Handle) /// if it was present, or `None` if the key was not found. + #[inline] pub fn remove( &self, py: pyo3::Python<'_>, @@ -145,6 +152,7 @@ impl Wrapped

{ /// other dict-like types; `.items()` is called and the result is iterated. /// - **Any other iterable** — iterated directly, with each element expected to /// unpack as a `(key, value)` pair. + #[inline] pub fn extend(&self, iterable: alias::BoundObject, mut transform: F) -> pyo3::PyResult<()> where F: FnMut(alias::PyObject, alias::PyObject) -> pyo3::PyResult, @@ -194,6 +202,7 @@ impl Wrapped

{ } /// Calls the `evict()` `n` times and returns count of removed items. + #[inline] pub fn drain( &self, py: pyo3::Python, @@ -224,6 +233,7 @@ impl Wrapped

{ Ok(count) } + #[inline] pub fn clone_ref(&self, py: pyo3::Python) -> Self { let shared = self.shared.clone_ref(py); let policy = self.inner.lock().clone_ref(py); diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index 5caf0c5..cd56947 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -133,6 +133,7 @@ impl PyCache { } /// Returns the number of elements the map can hold without reallocating. + #[inline] fn capacity(&self) -> usize { let inner = self.0.get(); let policy = inner.policy(); @@ -141,6 +142,7 @@ impl PyCache { } /// Returns the number of entries currently in the cache. + #[inline] fn __len__(&self) -> usize { let inner = self.0.get(); let policy = inner.policy(); @@ -148,6 +150,7 @@ impl PyCache { policy.table().len() } + #[inline] fn __sizeof__(&self) -> usize { let inner = self.0.get(); let policy = inner.policy(); @@ -155,6 +158,7 @@ impl PyCache { policy.table().capacity() * std::mem::size_of::() } + #[inline] fn __bool__(&self) -> bool { let inner = self.0.get(); let policy = inner.policy(); @@ -162,11 +166,13 @@ impl PyCache { !policy.table().is_empty() } + #[inline] fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { self.contains(py, key) } /// Returns `true` if the cache contains an entry for `key`. + #[inline] fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; let inner = self.0.get(); @@ -174,14 +180,16 @@ impl PyCache { } /// Returns `True` if cache is empty. + #[inline] fn is_empty(&self) -> bool { let inner = self.0.get(); let policy = inner.policy(); - policy.table().len() == 0 + policy.table().is_empty() } /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] fn is_full(&self) -> bool { let inner = self.0.get(); let shared = inner.shared(); @@ -231,6 +239,7 @@ impl PyCache { ) } + #[inline] fn __setitem__( &self, py: pyo3::Python, @@ -394,6 +403,7 @@ impl PyCache { } /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] fn drain( &self, py: pyo3::Python, @@ -404,6 +414,7 @@ impl PyCache { } /// Shrinks the internal allocation as close to the current length as possible. + #[inline] fn shrink_to_fit(&self) { let inner = self.0.get(); let mut policy = inner.policy(); @@ -514,6 +525,7 @@ impl PyCache { pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) } + #[inline] fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { self.keys(py) } @@ -526,6 +538,7 @@ impl PyCache { pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) } + #[inline] fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { self.copy(py) } @@ -599,6 +612,7 @@ macro_rules! implement_iterator { #[pyo3::pymethods] impl $name { + #[inline] fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { slf } diff --git a/src/typeref.rs b/src/typeref.rs index 0808aa2..af8168c 100644 --- a/src/typeref.rs +++ b/src/typeref.rs @@ -1,6 +1,7 @@ pub static mut STD_DICT_TYPE: *mut pyo3::ffi::PyTypeObject = std::ptr::null_mut(); pub static mut STD_TUPLE_TYPE: *mut pyo3::ffi::PyTypeObject = std::ptr::null_mut(); +#[inline(never)] unsafe fn get_type_object_for( py: pyo3::Python, ) -> *mut pyo3::ffi::PyTypeObject { @@ -8,7 +9,7 @@ unsafe fn get_type_object_for( } #[cold] -#[optimize(size)] +#[inline(never)] fn _initialize_typeref(py: pyo3::Python) { unsafe { STD_DICT_TYPE = get_type_object_for::(py); diff --git a/tests/mixins.py b/tests/mixins.py index f778d0f..e6b4033 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -340,6 +340,7 @@ def test_clear_removes_all_items(self): cache.clear() assert len(cache) == 0 assert cache.is_empty() + assert cache.current_size == 0 def test_clear_with_reuse(self): cache = self.create_cache() From 31212487067ad855a145c936da64a00208ec89d0 Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 19 May 2026 19:57:51 +0330 Subject: [PATCH 10/60] Optimize performance --- cachebox/_core.pyi | 14 +++------ src/policies/nopolicy.rs | 68 +++++++++++++--------------------------- src/policies/traits.rs | 6 ++-- src/policies/wrapped.rs | 5 ++- src/pyclasses/cache.rs | 9 +++--- 5 files changed, 36 insertions(+), 66 deletions(-) diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 9918ca0..b105552 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -33,11 +33,9 @@ class BaseCacheImpl(typing.Generic[KT, VT]): @property def maxsize(self) -> int: ... @property + def getsizeof(self) -> typing.Callable[[KT, VT]] | None: ... def current_size(self) -> int: ... - @property def remaining_size(self) -> int: ... - @property - def getsizeof(self) -> typing.Callable[[KT, VT]] | None: ... def capacity(self) -> int: ... def __len__(self) -> int: ... def __sizeof__(self) -> int: ... @@ -154,20 +152,18 @@ class Cache(BaseCacheImpl[KT, VT]): ... @property + def getsizeof(self) -> typing.Callable[[KT, VT]] | None: + """Returns the `getsizeof` function""" + ... + def current_size(self) -> int: """Returns the current total cumulative size consumed by all stored entries.""" ... - @property def remaining_size(self) -> int: """Returns the remaining size. Equals to `maxsize - current_size`""" ... - @property - def getsizeof(self) -> typing.Callable[[KT, VT]] | None: - """Returns the `getsizeof` function""" - ... - def capacity(self) -> int: """Returns the number of elements the map can hold without reallocating.""" ... diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index f5130e6..ea6a2e3 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -1,4 +1,4 @@ -use std::sync::atomic; +// use std::sync::atomic; use crate::hashbrown; use crate::internal::alias; @@ -125,9 +125,9 @@ impl traits::EntryExt for Occupied<'_> { #[inline] fn would_exceed(&self, extra_size: usize) -> bool { let handle = unsafe { self.bucket.as_ref() }; - let currsize = self.shared.currsize.load(atomic::Ordering::Relaxed); - currsize + self.policy + .currsize .saturating_add(extra_size) .saturating_sub(handle.size) > self.shared.maxsize.get() @@ -143,36 +143,16 @@ impl traits::OccupiedExt for Occupied<'_> { fn remove(self) -> Self::Handle { let (h, _) = unsafe { self.policy.table.remove(self.bucket) }; - self.shared.currsize.store( - self.shared - .currsize - .load(atomic::Ordering::Relaxed) - .saturating_sub(h.size), - atomic::Ordering::SeqCst, - ); + self.policy.currsize = self.policy.currsize.saturating_sub(h.size); self.shared.gv.increment(); h } fn replace(self, new: Self::Handle) -> Self::Handle { - self.shared.currsize.store( - self.shared - .currsize - .load(atomic::Ordering::Relaxed) - .saturating_add(new.size), - atomic::Ordering::SeqCst, - ); - + self.policy.currsize = self.policy.currsize.saturating_add(new.size); let old = unsafe { std::mem::replace(self.bucket.as_mut(), new) }; - - self.shared.currsize.store( - self.shared - .currsize - .load(atomic::Ordering::Relaxed) - .saturating_sub(old.size), - atomic::Ordering::SeqCst, - ); + self.policy.currsize = self.policy.currsize.saturating_sub(old.size); old } @@ -198,8 +178,7 @@ impl traits::EntryExt for Vacant<'_> { #[inline] fn would_exceed(&self, extra_size: usize) -> bool { - let currsize = self.shared.currsize.load(atomic::Ordering::Relaxed); - currsize.saturating_add(extra_size) > self.shared.maxsize.get() + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize.get() } #[inline(always)] @@ -210,13 +189,7 @@ impl traits::EntryExt for Vacant<'_> { impl traits::VacantExt for Vacant<'_> { fn insert(self, handle: Self::Handle) { - self.shared.currsize.store( - self.shared - .currsize - .load(atomic::Ordering::Relaxed) - .saturating_add(handle.size), - atomic::Ordering::SeqCst, - ); + self.policy.currsize = self.policy.currsize.saturating_add(handle.size); if !self.space_available { self.policy.table.reserve(1, |x| x.key.hash()); @@ -233,8 +206,6 @@ pub struct Shared { // Hard upper bound on `currsize`. Stored as [`NonZeroUsize`](std::num::NonZeroUsize) /// so the compiler can elide a zero-check branch in division/comparison hot paths. maxsize: std::num::NonZeroUsize, - /// Running total of all stored handles' sizes, maintained incrementally. - currsize: atomic::AtomicUsize, /// Monotonically incrementing counter bumped on every structural mutation /// (insert, remove, clear, shrink). Used to detect iterator invalidation. gv: utils::GenerationVersion, @@ -248,7 +219,7 @@ impl Shared { pub fn new(maxsize: usize, getsizeof: Option) -> Self { Self { maxsize: safe_non_zero!(maxsize), - currsize: atomic::AtomicUsize::new(0), + // currsize: atomic::AtomicUsize::new(0), gv: utils::GenerationVersion::default(), getsizeof: utils::GetsizeofFunction::new(getsizeof), } @@ -261,11 +232,6 @@ impl traits::SharedExt for Shared { self.maxsize.get() } - #[inline] - fn current_size(&self) -> usize { - self.currsize.load(atomic::Ordering::Relaxed) - } - #[inline] fn generation_version(&self) -> utils::GenerationVersion { self.gv.clone() @@ -279,7 +245,6 @@ impl traits::SharedExt for Shared { fn clone_ref(&self, py: pyo3::Python) -> Self { Self { maxsize: self.maxsize, - currsize: atomic::AtomicUsize::new(self.currsize.load(atomic::Ordering::Relaxed)), gv: Default::default(), getsizeof: self.getsizeof.clone_ref(py), } @@ -295,6 +260,8 @@ impl traits::SharedExt for Shared { pub struct NoPolicy { /// The raw hash table storing all live [`Handle`] entries. table: hashbrown::raw::RawTable, + /// Running total of all stored handles' sizes, maintained incrementally. + currsize: usize, } impl NoPolicy { @@ -305,6 +272,7 @@ impl NoPolicy { pub fn new(capacity: usize) -> Self { Self { table: hashbrown::raw::RawTable::with_capacity(capacity), + currsize: 0, } } @@ -329,6 +297,11 @@ impl traits::PolicyExt for NoPolicy { where Self: 'a; + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + #[inline] fn get( &mut self, @@ -391,7 +364,7 @@ impl traits::PolicyExt for NoPolicy { } self.table.clear(); shared.gv.increment(); - shared.currsize.store(0, atomic::Ordering::SeqCst); + self.currsize = 0; } fn py_eq( @@ -455,6 +428,9 @@ impl traits::PolicyExt for NoPolicy { } } - Self { table } + Self { + table, + currsize: self.currsize, + } } } diff --git a/src/policies/traits.rs b/src/policies/traits.rs index 1905364..917789b 100644 --- a/src/policies/traits.rs +++ b/src/policies/traits.rs @@ -66,9 +66,6 @@ pub trait SharedExt: Send + Sync { /// Returns the configured maxsize. fn maxsize(&self) -> usize; - /// Returns the current total cumulative size consumed by all stored entries. - fn current_size(&self) -> usize; - /// Returns the generation version. fn generation_version(&self) -> utils::GenerationVersion; @@ -93,6 +90,9 @@ pub trait PolicyExt { where Self: 'a; + /// Returns the current total cumulative size consumed by all stored entries. + fn current_size(&self) -> usize; + /// Looks up a handle by `hash` and `eq`, applying policy side-effects on hit. /// /// # Errors diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs index 6b86604..96f4fdc 100644 --- a/src/policies/wrapped.rs +++ b/src/policies/wrapped.rs @@ -85,9 +85,8 @@ impl Wrapped

{ /// Returns the remaining size. Equals to `maxsize - current_size`. #[inline] pub fn remaining_size(&self) -> usize { - self.shared - .maxsize() - .saturating_sub(self.shared.current_size()) + let policy = self.inner.lock(); + self.shared.maxsize().saturating_sub(policy.current_size()) } /// Returns `true` if the cache contains an entry for `key`. diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index cd56947..1516345 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -114,13 +114,11 @@ impl PyCache { inner.shared().maxsize() } - #[getter] fn current_size(&self) -> usize { let inner = self.0.get(); - inner.shared().current_size() + inner.policy().current_size() } - #[getter] fn remaining_size(&self) -> usize { let inner = self.0.get(); inner.remaining_size() @@ -193,8 +191,9 @@ impl PyCache { fn is_full(&self) -> bool { let inner = self.0.get(); let shared = inner.shared(); + let policy = inner.policy(); - shared.current_size() >= shared.maxsize() + policy.current_size() >= shared.maxsize() } /// Equals to `self[key] = value`, but returns a value: @@ -566,7 +565,7 @@ impl PyCache { format!( "{}[{}/{}]({})", unsafe { utils::get_type_name(py, slf.as_ptr()) }, - shared.current_size(), + policy.current_size(), shared.maxsize(), items ) From 59d423a5c03503f7d8daa0eef9f0db2700f0922c Mon Sep 17 00:00:00 2001 From: awolverp Date: Thu, 21 May 2026 16:05:43 +0330 Subject: [PATCH 11/60] Refactor and test a big part of FIFOCache implementation ( Commit 1/2 ) --- .gitignore | 1 + Cargo.lock | 32 -- Cargo.toml | 5 +- Makefile | 4 + a.py | 64 ---- cachebox/__init__.py | 8 + cachebox/_core.pyi | 298 +++++++++++------- src/internal/utils.rs | 3 +- src/lib.rs | 16 + src/policies/common.rs | 146 +++++++++ src/policies/fifopolicy.rs | 427 +++++++++++++++++++++++++ src/policies/mod.rs | 2 + src/policies/nopolicy.rs | 223 ++------------ src/policies/traits.rs | 12 +- src/policies/wrapped.rs | 8 +- src/pyclasses/cache.rs | 38 ++- src/pyclasses/fifocache.rs | 615 +++++++++++++++++++++++++++++++++++++ src/pyclasses/mod.rs | 1 + tests/conftest.py | 9 + tests/fifocache.py | 290 +++++++++++++++++ tests/mixins.py | 55 ++-- 21 files changed, 1801 insertions(+), 456 deletions(-) delete mode 100644 a.py create mode 100644 src/policies/common.rs create mode 100644 src/policies/fifopolicy.rs create mode 100644 src/pyclasses/fifocache.rs create mode 100644 tests/conftest.py create mode 100644 tests/fifocache.py diff --git a/.gitignore b/.gitignore index 61f52f5..6535ef0 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__ /.pytest_cache /htmlcov /backup +/a.py diff --git a/Cargo.lock b/Cargo.lock index 3ffed3e..721fdf3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -16,7 +16,6 @@ dependencies = [ "parking_lot", "pyo3", "pyo3-build-config", - "tokio", ] [[package]] @@ -47,15 +46,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "inventory" -version = "0.3.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b" -dependencies = [ - "rustversion", -] - [[package]] name = "libc" version = "0.2.186" @@ -100,12 +90,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "pin-project-lite" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" - [[package]] name = "portable-atomic" version = "1.13.1" @@ -127,7 +111,6 @@ version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ - "inventory", "libc", "once_cell", "portable-atomic", @@ -208,12 +191,6 @@ dependencies = [ "bitflags", ] -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - [[package]] name = "scopeguard" version = "1.2.0" @@ -249,15 +226,6 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" -[[package]] -name = "tokio" -version = "1.52.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" -dependencies = [ - "pin-project-lite", -] - [[package]] name = "unicode-ident" version = "1.0.24" diff --git a/Cargo.toml b/Cargo.toml index e17400d..484fd95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,8 +24,8 @@ strip = true [dependencies] cfg-if = "1.0.4" parking_lot = {version="0.12.5", default-features=false} -pyo3 = {version="0.28.3", default-features=false, features=["macros", "generate-import-lib", "multiple-pymethods"]} -tokio = {version="1.52.3", default-features=false, features=["sync"]} +pyo3 = {version="0.28.3", default-features=false, features=["macros", "generate-import-lib"]} +# tokio = {version="1.52.3", default-features=false, features=["sync"]} [build-dependencies] pyo3-build-config = {version="0.28.3", default-features=false, features=["resolve-config"]} @@ -34,6 +34,7 @@ pyo3-build-config = {version="0.28.3", default-features=false, features=["resolv default = ["inline-more", "extension-module"] inline-more = [] extension-module = ["pyo3/extension-module"] +fifocache-small-offset = [] [lints.clippy] dbg_macro = "warn" diff --git a/Makefile b/Makefile index e0412ac..431394b 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,10 @@ help: @echo -e "\tformat format rust and python code" @echo -e "\tclean clean all the unneeded files" +.PHONY: build-test +build-test: + maturin develop --features "fifocache-small-offset" + .PHONY: build-dev build-dev: maturin develop diff --git a/a.py b/a.py deleted file mode 100644 index 9afbb54..0000000 --- a/a.py +++ /dev/null @@ -1,64 +0,0 @@ -import timeit - -import cachebox - -# --- Setup --- -MAXSIZE = 100_000 -N = 1_000 # operations per benchmark -REPEAT = 100 -NUMBER = 100 - - -def make_cache(n: int = N) -> cachebox.Cache: - """Create a pre-populated cache for benchmarks that need existing keys.""" - c = cachebox.Cache(maxsize=MAXSIZE, capacity=n) - for i in range(n): - c.insert(i, f"value_{i}") - return c - - -# --- Benchmark definitions --- - - -def bench_insert(): - c = cachebox.Cache(maxsize=MAXSIZE, capacity=N) - for i in range(N): - c.insert(i, f"value_{i}") - - -def bench_get(): - c = make_cache() - for i in range(N): - c.get(i) - - -def bench_update(): - c = make_cache() - for i in range(N): - c.insert(i, f"new_value_{i}") # insert on existing key = update - - -def bench_delete(): - c = make_cache() - for i in range(N): - del c[i] - - -# --- Runner --- - -benchmarks = { - "insert": bench_insert, - "get": bench_get, - "update": bench_update, - "delete": bench_delete, -} - -print(f"Benchmark: {N} ops each, best of {REPEAT}x{NUMBER} runs\n") -print(f"{'Operation':<10} {'Best (ms)':>10} {'Per-op (µs)':>12}") -print("-" * 35) - -for name, fn in benchmarks.items(): - times = timeit.repeat(fn, repeat=REPEAT, number=NUMBER) - best_ms = min(times) / NUMBER * 1000 # best total run in ms - per_op_us = min(times) / NUMBER / N * 1_000_000 # per single op in µs - print(f"{name:<10} {best_ms:>10.3f} {per_op_us:>12.4f}") diff --git a/cachebox/__init__.py b/cachebox/__init__.py index 7e80a87..52875e7 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -1,2 +1,10 @@ from ._core import BaseCacheImpl as BaseCacheImpl from ._core import Cache as Cache +from ._core import FIFOCache as FIFOCache + +try: + from ._core import ( + _fifocache_small_offset as _fifocache_small_offset, # type: ignore + ) +except ImportError: + pass diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index b105552..1905ecb 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -29,21 +29,70 @@ class BaseCacheImpl(typing.Generic[KT, VT]): *, capacity: int = 0, getsizeof: typing.Callable[[KT, VT]] | None = None, - ) -> None: ... + ) -> None: + """ + Initialize a new instance. + + Args: + maxsize: Maximum number of elements the cache can hold. If zero, the limit is set to sys.maxsize internally. + iterable: Initial data to populate the cache. + capacity: Pre-allocate cache capacity to minimize reallocations. Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. When `None`, each + entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + Use this to implement weighted caching — for example, sizing entries by + memory footprint or byte length. + + The cache can be pre-sized via `capacity` to reduce reallocations when + the number of expected entries is known ahead of time. + """ + ... + @property - def maxsize(self) -> int: ... + def maxsize(self) -> int: + """Returns the specified `maxsize`""" + ... + @property - def getsizeof(self) -> typing.Callable[[KT, VT]] | None: ... - def current_size(self) -> int: ... - def remaining_size(self) -> int: ... - def capacity(self) -> int: ... - def __len__(self) -> int: ... + def getsizeof(self) -> typing.Callable[[KT, VT]] | None: + """Returns the `getsizeof` function""" + ... + + def current_size(self) -> int: + """Returns the current total cumulative size consumed by all stored entries.""" + ... + + def remaining_size(self) -> int: + """Returns the remaining size. Equals to `maxsize - current_size`""" + ... + + def capacity(self) -> int: + """Returns the number of elements the map can hold without reallocating.""" + ... + + def __len__(self) -> int: + """Returns the number of entries currently in the cache.""" + ... + def __sizeof__(self) -> int: ... def __bool__(self) -> bool: ... def __contains__(self, key: KT) -> bool: ... - def contains(self, key: KT) -> bool: ... - def is_empty(self) -> bool: ... - def is_full(self) -> bool: ... + def contains(self, key: KT) -> bool: + """ + Returns `true` if the cache contains an entry for `key`. Equals to `key in self`. + + It's recommended to use this method instead of `key in self`, as it keeps code + compatible across different cache policies. + """ + ... + + def is_empty(self) -> bool: + """Returns `True` if cache is empty. Exactly like `bool(self)`.""" + ... + + def is_full(self) -> bool: + """Returns `True` when the cumulative size has reached the maxsize limit.""" + ... + def insert( self, key: KT, value: VT, *args: typing.Any, **kwargs: typing.Any ) -> typing.Optional[VT]: ... @@ -65,12 +114,34 @@ class BaseCacheImpl(typing.Generic[KT, VT]): *args: typing.Any, **kwargs: typing.Any, ) -> typing.Optional[VT | DT]: ... - def pop(self, key: KT, default: DT = ...) -> typing.Union[VT, DT]: ... + def pop(self, key: KT, default: DT = ...) -> typing.Union[VT, DT]: + """ + Removes specified key and returns the corresponding value. + + If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + """ + ... + def __delitem__(self, key: KT) -> None: ... def popitem(self) -> typing.Tuple[KT, VT]: ... - def drain(self, n: int) -> int: ... - def shrink_to_fit(self) -> None: ... - def clear(self, *, reuse: bool = False) -> None: ... + def drain(self, n: int) -> int: + """ + Calls the `popitem()` `n` times and returns count of removed items. + """ + ... + + def shrink_to_fit(self) -> None: + """Shrinks the internal allocation as close to the current length as possible.""" + ... + + def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from cache. + + If `reuse` is True, will not free the memory for reusing in the future. + """ + ... + def __eq__(self, other: typing.Any) -> bool: ... def __ne__(self, other: typing.Any) -> bool: ... def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: ... @@ -121,74 +192,6 @@ class Cache(BaseCacheImpl[KT, VT]): memory pressure relief. """ - def __init__( - self, - maxsize: int, - iterable: _IterableType[KT, VT] | None = None, - *, - capacity: int = ..., - getsizeof: typing.Callable[[KT, VT]] | None = ..., - ) -> None: - """ - Initialize a new Cache instance. - - Args: - maxsize: Maximum number of elements the cache can hold. If zero, the limit is set to sys.maxsize internally. - iterable: Initial data to populate the cache. - capacity: Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. - getsizeof: A callable that computes the size of a key-value pair. When `None`, each - entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). - Use this to implement weighted caching — for example, sizing entries by - memory footprint or byte length. - - The cache can be pre-sized via `capacity` to reduce hash table reallocations when - the number of expected entries is known ahead of time. - """ - ... - - @property - def maxsize(self) -> int: - """Returns the specified `maxsize`""" - ... - - @property - def getsizeof(self) -> typing.Callable[[KT, VT]] | None: - """Returns the `getsizeof` function""" - ... - - def current_size(self) -> int: - """Returns the current total cumulative size consumed by all stored entries.""" - ... - - def remaining_size(self) -> int: - """Returns the remaining size. Equals to `maxsize - current_size`""" - ... - - def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" - ... - - def __len__(self) -> int: - """Returns the number of entries currently in the cache.""" - ... - - def contains(self, key: KT) -> bool: - """ - Returns `true` if the cache contains an entry for `key`. Equals to `key in self`. - - It's recommended to use this method instead of `key in self`, as it keeps code - compatible across different cache policies. - """ - ... - - def is_empty(self) -> bool: - """Returns `True` if cache is empty. Exactly like `bool(self)`.""" - ... - - def is_full(self) -> bool: - """Returns `True` when the cumulative size has reached the maxsize limit.""" - ... - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ Equals to `self[key] = value`, but returns a value: @@ -221,13 +224,6 @@ class Cache(BaseCacheImpl[KT, VT]): Returns the value associated with the key if present, otherwise returns the specified default value. Equivalent to `self[key]`, but provides a fallback default if the key is not found. - - Args: - key: The key to look up in the cache. - default: The value to return if the key is not present in the cache. Defaults to None. - - Returns: - The value associated with the key, or the default value if the key is not found. """ ... @@ -235,8 +231,6 @@ class Cache(BaseCacheImpl[KT, VT]): self, key: KT, default: typing.Optional[DT] = None, - *args: typing.Any, - **kwargs: typing.Any, ) -> typing.Optional[VT | DT]: """ Inserts key with a value of default if key is not in the cache. @@ -245,34 +239,10 @@ class Cache(BaseCacheImpl[KT, VT]): """ ... - def pop(self, key: KT, default: DT = ...) -> typing.Union[VT, DT]: - """ - Removes specified key and returns the corresponding value. - - If the key is not found, returns the `default` if given; otherwise, raise a KeyError. - """ - ... - def popitem(self) -> typing.Tuple[KT, VT]: """Always raises `OverflowError` because `Cache` has neither policy nor algorithm to evict items.""" ... - def drain(self, n: int) -> int: - """Calls the `popitem()` `n` times and returns count of removed items.""" - ... - - def shrink_to_fit(self) -> None: - """Shrinks the internal allocation as close to the current length as possible.""" - ... - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If `reuse` is True, will not free the memory for reusing in the future. - """ - ... - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: """ Returns an iterable object of the cache's items (key-value pairs). @@ -302,3 +272,101 @@ class Cache(BaseCacheImpl[KT, VT]): - Values are not ordered. """ ... + +class FIFOCache(BaseCacheImpl[KT, VT]): + """ + A First-In-First-Out (FIFO) cache eviction policy: when the cache is full, the oldest + inserted item is always the first to be removed, regardless of how often it has been accessed. + + ## How It Works + The FIFO algorithm is one of the simplest cache eviction strategies. Items are stored in + insertion order, and when the cache reaches capacity, the item that has been there the + longest is evicted to make room. There is no concept of "recently used" or "frequently used" + - age alone determines eviction order. Conceptually, it behaves like a queue: new items + join the back, and evictions come from the front. + + This implementation backs that queue with a `double-ended queue` for O(1) front removal, + paired with a `hash map` for O(1) key lookups. Rather than storing physical indices into + the deque (which shift every time an item is evicted from the front), the table stores + logical indices - a monotonically increasing counter assigned at insertion time. + A separate `front_offset` counter tracks how many items have ever been evicted; the physical + position of any key is recovered at read time as `entries[table[key] - front_offset]`, + keeping both eviction and lookup O(1) without any per-eviction rewriting of the table. + + ### Pros + - Insert, lookup, and evict are all O(1) amortized: the `front_offset` trick eliminates the O(n) + index-shifting that a native implementation would require on every eviction. + - Eviction order is fully deterministic: the oldest item always goes first, independent of access + patterns, making behaviour easy to reason about and reproduce in tests. + - No per-read overhead. Unlike LRU, FIFO requires no bookkeeping on cache hits. + + ### Cons + - Access-blind eviction. A hot item accessed thousands of times is evicted just as readily as one + that has never been read. Hit rates suffer on workloads with strong temporal locality. + - The logical-index indirection adds a layer of internal complexity compared to a naïve queue-based cache. + - The rare O(n) index rebase (triggered when `front_offset` nears `usize::MAX - isize::MAX`) introduces + an occasional latency spike. Amortized cost is negligible, but worst-case latency is unbounded in principle. + + ## When to use it + Reach for `FIFOPolicy` when: + - Eviction order must be predictable and auditable: streaming pipelines, sequential batch processors, or + any context where deterministic behaviour simplifies debugging. + - Access patterns are roughly uniform, so there is no meaningful "hot" subset of keys that a recency or + frequency-aware policy could exploit. + - Read overhead must be minimal: FIFO's zero-cost hits make it preferable to LRU in insert-heavy workloads + with infrequent re-reads. + + Avoid it when your workload has strong temporal locality. If recently or frequently accessed items are likely + to be needed again soon, an LRU or LFU policy will deliver meaningfully better hit rates. + """ + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + + It's recommended to use this method instead of `self[key] = value`, as it keeps code + compatible across different cache policies. + """ + ... + + def update(self, iterable: _IterableType[KT, VT]) -> None: + """ + Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts key with a value of default if key is not in the cache. + + Returns the value for key if key is in the cache, else default. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes the element that has been in the cache the longest. + """ + ... + + def first(self, n: int = 0) -> typing.Optional[KT]: + """ + Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). + + By using `n` parameter, you can browse order index by index. + """ + ... + + def last(self) -> typing.Optional[KT]: + """ + Returns the last key in cache. Equals to `self.first(-1)`. + """ + ... diff --git a/src/internal/utils.rs b/src/internal/utils.rs index b24da65..716a424 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -309,7 +309,8 @@ impl From for alias::PyObject { value.object } } - +/// Holds and manage `getsizeof` function which is a callable used to measure the +/// size of each key-value pair. #[repr(transparent)] pub struct GetsizeofFunction(Option); diff --git a/src/lib.rs b/src/lib.rs index 2f45905..5bf084f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,8 +11,17 @@ pub mod internal; pub mod policies; pub mod pyclasses; +// fn _fifocache_small_offset_attribute( +// m: &pyo3::Bound<'_, pyo3::types::PyModule>, +// ) -> pyo3::PyResult<()> { + +// } + #[pyo3::pymodule] mod _core { + #[allow(unused_imports)] + use pyo3::types::PyModuleMethods; + use crate::typeref; #[pymodule_export] @@ -29,9 +38,16 @@ mod _core { #[pymodule_export] use crate::pyclasses::cache::PyCacheValues; + #[pymodule_export] + use crate::pyclasses::fifocache::PyFIFOCache; + #[pymodule_init] pub fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { typeref::initialize_typeref(m.py()); + + #[cfg(feature = "fifocache-small-offset")] + m.add("_fifocache_small_offset", true)?; + Ok(()) } } diff --git a/src/policies/common.rs b/src/policies/common.rs new file mode 100644 index 0000000..ecfc9a6 --- /dev/null +++ b/src/policies/common.rs @@ -0,0 +1,146 @@ +//! Common implementations accross multiple policies + +use crate::internal::alias; +use crate::internal::utils; +use crate::policies::traits; + +/// A key-value pair with a precomputed hash and combined size. +pub struct Handle { + /// The cache key together with its precomputed hash, avoiding repeated + /// Python hash calls during table lookups. + key: utils::PrecomputedHashObject, + /// The cached value associated with this key. + value: alias::PyObject, + /// Size of the key and value as reported by `getsizeof`. + size: usize, +} + +impl Handle { + /// Creates a new [`Handle`], which calculates the precomputed hash itself. + #[inline] + pub fn new( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + Self::with_precomputed_hash_key( + py, + getsizeof, + utils::PrecomputedHashObject::new(py, key)?, + value, + ) + } + + /// Creates a new [`Handle`] from an already-hashed key. + /// + /// Prefer this over [`Handle::new`] when the caller has already paid the cost + /// of computing the hash (e.g. during a table lookup that preceded insertion). + #[inline] + pub fn with_precomputed_hash_key( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + key: utils::PrecomputedHashObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + let size = getsizeof.call(py, key.as_ref(), &value)?; + Ok(Self { key, value, size }) + } + + /// Consumes `self` and returns the [`utils::PrecomputedHashObject`]. + #[inline] + pub fn into_key(self) -> utils::PrecomputedHashObject { + self.key + } + + /// Returns a reference to the value. + #[inline] + pub fn value(&self) -> &alias::PyObject { + &self.value + } + + /// Consumes `self` and returns the value of the pair. + #[inline] + pub fn into_value(self) -> alias::PyObject { + self.value + } + + /// Consumes `self` and returns the pair. + #[inline] + pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { + (self.key, self.value) + } + + /// Makes a clone of self. + /// + /// This creates another pointer to the same object, increasing its reference count. + #[inline] + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self { + key: self.key.clone_ref(py), + value: self.value.clone_ref(py), + size: self.size, + } + } +} + +impl traits::HandleExt for Handle { + type Key = utils::PrecomputedHashObject; + + #[inline(always)] + fn key(&self) -> &utils::PrecomputedHashObject { + &self.key + } + + #[inline(always)] + fn size(&self) -> usize { + self.size + } +} + +/// Shared variables which should separated from Mutex +pub struct Shared { + // Hard upper bound on `currsize`. + maxsize: std::num::NonZeroUsize, + /// Monotonically incrementing counter bumped on every structural mutation + gv: utils::GenerationVersion, + /// Callable used to measure size of each key-value pair. + getsizeof: utils::GetsizeofFunction, +} + +impl Shared { + /// Creates a new [`Shared`]. + #[inline] + pub fn new(maxsize: usize, getsizeof: Option) -> Self { + Self { + maxsize: safe_non_zero!(maxsize), + gv: utils::GenerationVersion::default(), + getsizeof: utils::GetsizeofFunction::new(getsizeof), + } + } +} + +impl traits::SharedExt for Shared { + #[inline] + fn maxsize(&self) -> usize { + self.maxsize.get() + } + + #[inline] + fn generation_version(&self) -> &utils::GenerationVersion { + &self.gv + } + + #[inline] + fn getsizeof(&self) -> &utils::GetsizeofFunction { + &self.getsizeof + } + + fn clone_ref(&self, py: pyo3::Python) -> Self { + Self { + maxsize: self.maxsize, + gv: Default::default(), + getsizeof: self.getsizeof.clone_ref(py), + } + } +} diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs new file mode 100644 index 0000000..404184f --- /dev/null +++ b/src/policies/fifopolicy.rs @@ -0,0 +1,427 @@ +use std::collections::VecDeque; + +use crate::hashbrown; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; + +pub use super::common::Handle; +pub use super::common::Shared; + +/// A view into an occupied entry in [`FIFOPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut FIFOPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied index. + bucket: hashbrown::raw::Bucket, +} + +impl traits::EntryExt for Occupied<'_> { + type Handle = Handle; + type Shared = Shared; + + fn would_exceed(&self, extra_size: usize) -> bool { + let handle = + unsafe { &self.policy.entries[*self.bucket.as_ref() - self.policy.front_offset] }; + + self.policy + .currsize + .saturating_add(extra_size) + .saturating_sub(handle.size()) + > self.shared.maxsize() + } + + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { + self.policy.evict(py, self.shared) + } +} + +impl traits::OccupiedExt for Occupied<'_> { + fn replace(self, new: Self::Handle) -> Self::Handle { + // In update we don't need to increment this; because this does not change the memory address ranges + // self.shared.generation_version().increment(); + + let index = unsafe { *self.bucket.as_ref() }; + let item = &mut self.policy.entries[index - self.policy.front_offset]; + + self.policy.currsize = self + .policy + .currsize + .saturating_sub(item.size()) + .saturating_add(new.size()); + + std::mem::replace(item, new) + } + + fn remove(self) -> Self::Handle { + let (mut index, _) = unsafe { self.policy.table.remove(self.bucket) }; + index -= self.policy.front_offset; + + self.policy + .decrement_indexes(index + 1, self.policy.entries.len()); + + let handle = self.policy.entries.remove(index).unwrap(); + self.policy.currsize = self.policy.currsize.saturating_sub(handle.size()); + handle + } +} + +/// A view into a vacant slot in [`FIFOPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut FIFOPolicy, + /// The shared configuration + shared: &'a Shared, +} + +impl traits::EntryExt for Vacant<'_> { + type Handle = Handle; + type Shared = Shared; + + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { + self.policy.evict(py, self.shared) + } +} + +impl traits::VacantExt for Vacant<'_> { + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + self.policy.table.insert( + handle.key().hash(), + self.policy.entries.len() + self.policy.front_offset, + |index| { + self.policy.entries[(*index) - self.policy.front_offset] + .key() + .hash() + }, + ); + self.policy.entries.push_back(handle); + } +} + +pub struct FIFOPolicy { + /// Maps each key to its logical index into [`FIFOPolicy::entries`], enabling O(1) lookups. + /// + /// Stored indices are *logical* (i.e. they do not reset when entries are popped from the + /// front), so they must be adjusted on read: `entries[table[k] - front_offset]`. + /// As a result, table values grow monotonically over the lifetime of the cache, + /// but their *count* stays bounded by the cache capacity — this is not a memory concern. + table: hashbrown::raw::RawTable, + + /// Insertion-ordered sequence of cached handles, providing O(1) front removal. + entries: VecDeque, + + /// Running total of all stored handles' sizes, maintained incrementally. + currsize: usize, + + /// Number of handles ever popped from the front of [`FIFOPolicy::entries`]. + /// + /// Because [`VecDeque`] indices shift on front-removal, naively keeping + /// [`FIFOPolicy::table`] consistent would require decrementing every stored + /// index — an O(n) operation. Instead, this counter is incremented on each + /// pop and subtracted at read time: `entries[table[k] - front_offset]`, + /// keeping both the pop and the lookup O(1). + /// + /// To prevent `usize` overflow in the subtraction, once `front_offset` + /// reaches `usize::MAX - isize::MAX`, all indices in `table` are decremented + /// by the current `front_offset` and the counter is reset to zero. This + /// rewrite is O(n) but occurs so rarely, at most once per + /// `usize::MAX - isize::MAX` evictions, that it is effectively free in practice. + front_offset: usize, +} + +impl FIFOPolicy { + /// Creates a new [`FIFOPolicy`]. + /// + /// The underlying [`VecDeque`] is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + entries: VecDeque::with_capacity(capacity), + currsize: 0, + front_offset: 0, + } + } + + #[inline] + pub fn table(&self) -> &hashbrown::raw::RawTable { + &self.table + } + + #[inline] + pub fn vecdeque(&self) -> &VecDeque { + &self.entries + } + + #[inline] + fn decrement_indexes(&mut self, start: usize, end: usize) { + #[cfg(not(feature = "fifocache-small-offset"))] + const MAX_FRONT_OFFSET: usize = usize::MAX - isize::MAX as usize; + + #[cfg(feature = "fifocache-small-offset")] + const MAX_FRONT_OFFSET: usize = u8::MAX as usize; + + // Fast path: shifting the entire front is a single counter increment. + // Guard against overflow; the full-normalization path below handles that case. + if start <= 1 && end == self.entries.len() && self.front_offset < MAX_FRONT_OFFSET { + self.front_offset += 1; + return; + } + + // Snapshot so the borrow checker doesn't complain about `self` inside the loops. + let fo = self.front_offset; + + if (end - start) > self.table.num_buckets() / 2 { + // Table-scan path: already O(n), so fold normalization in for free. + // One pass: normalize every index (subtract fo) and decrement those in [start, end). + unsafe { + for bucket in self.table.iter() { + let i = bucket.as_mut(); + let vd_idx = *i - fo; // raw VecDeque index + *i = if start <= vd_idx && vd_idx < end { + vd_idx - 1 // normalize + decrement + } else { + vd_idx // normalize only + }; + } + } + } else { + // Entries-scan path: O(range) decrement pass, then O(n) normalization pass. + // + // Pass 1: decrement the logical indices for entries in [start, end). + let shifted = self.entries.range(start..end); + for (i, entry) in (start..end).zip(shifted) { + let result = unsafe { + self.table + .get_mut(entry.key().hash(), |x| Ok::<_, pyo3::PyErr>((*x) - fo == i)) + .unwrap_unchecked() + }; + *result.expect("index not found") -= 1; + } + + // Pass 2: normalize every stored index by subtracting `fo`. + // • Entries in [start, end): (vd_idx + fo - 1) - fo = vd_idx - 1 + // • All others: (vd_idx + fo) - fo = vd_idx + if fo != 0 { + unsafe { + for bucket in self.table.iter() { + *bucket.as_mut() -= fo; + } + } + } + } + + // Both branches now store raw VecDeque indices, so the offset is zero. + self.front_offset = 0; + } +} + +impl traits::PolicyExt for FIFOPolicy { + type Shared = Shared; + type Handle = Handle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let eq = |index: &usize| { + self.entries[(*index) - self.front_offset] + .key() + .py_eq(py, &key) + }; + match self.table.get(key.hash(), eq)? { + Some(index) => Ok(Some(&self.entries[(*index) - self.front_offset])), + None => Ok(None), + } + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + let eq = |index: &usize| { + self.entries[(*index) - self.front_offset] + .key() + .py_eq(py, &key) + }; + match self.table.find(key.hash(), eq)? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + #[inline] + fn evict(&mut self, py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { + let front = self.entries.front(); + if front.is_none() { + return Err(new_py_error!(PyKeyError, ())); + } + + let front = unsafe { front.unwrap_unchecked() }; + + let eq = |index: &usize| { + self.entries[(*index) - self.front_offset] + .key() + .py_eq(py, front.key()) + }; + if std::hint::unlikely(self.table.remove_entry(front.key().hash(), eq)?.is_none()) { + unreachable!("popitem key not found in table"); + } + + shared.generation_version().increment(); + + let front = unsafe { self.entries.pop_front().unwrap_unchecked() }; + + self.currsize = self.currsize.saturating_sub(front.size()); + self.decrement_indexes(1, self.entries.len()); + + Ok(front) + } + + #[inline] + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + // Shrink table + let initial = self.table.capacity(); + self.table.shrink_to(0, |index| { + self.entries[(*index) - self.front_offset].key().hash() + }); + + if initial != self.table.capacity() { + shared.generation_version().increment(); + } + + // Shrink entries + let initial = self.entries.capacity(); + self.entries.shrink_to_fit(); + + if initial != self.entries.capacity() { + shared.generation_version().increment(); + } + } + + #[inline] + fn clear(&mut self, shared: &Self::Shared) { + if self.entries.is_empty() { + return; + } + + self.table.clear(); + self.entries.clear(); + shared.generation_version().increment(); + self.currsize = 0; + } + + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + let result = unsafe { + let mut iterator = self.table.iter().map(|x| x.as_ref()); + + iterator.all(|index_1| { + let handle_1 = &self.entries[(*index_1) - self.front_offset]; + + let result = other.table.get(handle_1.key().hash(), |index| { + handle_1 + .key() + .py_eq(py, other.entries[(*index) - other.front_offset].key()) + }); + + match result { + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + Ok(None) => false, + Ok(Some(index_2)) => { + let handle_2 = &other.entries[(*index_2) - other.front_offset]; + + let value_1 = handle_1.value(); + let value_2 = handle_2.value(); + + match utils::pyobject_equal(py, value_1.as_ptr(), value_2.as_ptr()) { + Ok(result) => result, + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + } + } + } + }) + }; + + if let Some(error) = error { + return Err(error); + } + Ok(result) + } + + fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + let mut entries = VecDeque::with_capacity(self.entries.len()); + for handle in self.entries.iter() { + entries.push_back(handle.clone_ref(py)); + } + + Self { + table: self.table.clone(), + entries, + currsize: self.currsize, + front_offset: self.front_offset, + } + } +} diff --git a/src/policies/mod.rs b/src/policies/mod.rs index 767bb21..2be38ae 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -1,5 +1,7 @@ +pub mod common; pub mod traits; +pub mod fifopolicy; pub mod nopolicy; pub mod wrapped; diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index ea6a2e3..60de168 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -1,114 +1,14 @@ -// use std::sync::atomic; - use crate::hashbrown; -use crate::internal::alias; use crate::internal::utils; use crate::policies::traits; +use crate::policies::traits::HandleExt; use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; -/// A key-value pair with a precomputed hash and combined memory size. -/// -/// The `size` field caches the result of `getsizeof(key) + getsizeof(value)` -/// so that [`NoPolicy`] can maintain an accurate `currsize` budget without -/// re-invoking the Python-side sizing function on every access. -pub struct Handle { - /// The cache key together with its precomputed hash, avoiding repeated - /// Python hash calls during table lookups. - key: utils::PrecomputedHashObject, - /// The cached value associated with this key. - value: alias::PyObject, - /// Combined memory footprint of the key and value as reported by `getsizeof`. - size: usize, -} - -impl Handle { - /// Creates a new [`Handle`], which calculates the precomputed hash itself. - #[inline] - pub fn new( - py: pyo3::Python<'_>, - getsizeof: &utils::GetsizeofFunction, - key: alias::PyObject, - value: alias::PyObject, - ) -> pyo3::PyResult { - Self::with_precomputed_hash_key( - py, - getsizeof, - utils::PrecomputedHashObject::new(py, key)?, - value, - ) - } - - /// Creates a new [`Handle`] from an already-hashed key. - /// - /// Prefer this over [`Handle::new`] when the caller has already paid the cost - /// of computing the hash (e.g. during a table lookup that preceded insertion). - #[inline] - pub fn with_precomputed_hash_key( - py: pyo3::Python<'_>, - getsizeof: &utils::GetsizeofFunction, - key: utils::PrecomputedHashObject, - value: alias::PyObject, - ) -> pyo3::PyResult { - let size = getsizeof.call(py, key.as_ref(), &value)?; - Ok(Self { key, value, size }) - } - - /// Consumes `self` and returns the [`utils::PrecomputedHashObject`]. - #[inline] - pub fn into_key(self) -> utils::PrecomputedHashObject { - self.key - } - - /// Returns a reference to the value. - #[inline] - pub fn value(&self) -> &alias::PyObject { - &self.value - } - - /// Consumes `self` and returns the value of the pair. - #[inline] - pub fn into_value(self) -> alias::PyObject { - self.value - } - - /// Consumes `self` and returns the pair. - #[inline] - pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { - (self.key, self.value) - } - - /// Makes a clone of self. - /// - /// This creates another pointer to the same object, increasing its reference count. - #[inline] - pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { - Self { - key: self.key.clone_ref(py), - value: self.value.clone_ref(py), - size: self.size, - } - } -} - -impl traits::HandleExt for Handle { - type Key = utils::PrecomputedHashObject; - - #[inline(always)] - fn key(&self) -> &utils::PrecomputedHashObject { - &self.key - } - - #[inline(always)] - fn size(&self) -> usize { - self.size - } -} +pub use super::common::Handle; +pub use super::common::Shared; /// A view into an occupied entry in [`NoPolicy`]. -/// -/// Holds a mutable reference to the parent policy and a raw bucket pointer -/// to the existing [`Handle`], enabling in-place removal or replacement without -/// an additional lookup. pub struct Occupied<'a> { /// The parent storage that owns the hash table. policy: &'a mut NoPolicy, @@ -129,39 +29,35 @@ impl traits::EntryExt for Occupied<'_> { self.policy .currsize .saturating_add(extra_size) - .saturating_sub(handle.size) - > self.shared.maxsize.get() + .saturating_sub(handle.size()) + > self.shared.maxsize() } #[inline(always)] - fn evict(&mut self) -> pyo3::PyResult { - self.policy.evict(self.shared) + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { + self.policy.evict(py, self.shared) } } impl traits::OccupiedExt for Occupied<'_> { fn remove(self) -> Self::Handle { - let (h, _) = unsafe { self.policy.table.remove(self.bucket) }; - - self.policy.currsize = self.policy.currsize.saturating_sub(h.size); - self.shared.gv.increment(); + self.shared.generation_version().increment(); + let (h, _) = unsafe { self.policy.table.remove(self.bucket) }; + self.policy.currsize = self.policy.currsize.saturating_sub(h.size()); h } fn replace(self, new: Self::Handle) -> Self::Handle { - self.policy.currsize = self.policy.currsize.saturating_add(new.size); + self.policy.currsize = self.policy.currsize.saturating_add(new.size()); let old = unsafe { std::mem::replace(self.bucket.as_mut(), new) }; - self.policy.currsize = self.policy.currsize.saturating_sub(old.size); + self.policy.currsize = self.policy.currsize.saturating_sub(old.size()); old } } /// A view into a vacant slot in [`NoPolicy`]. -/// -/// Holds a mutable reference to the parent policy, allowing a new [`Handle`] -/// to be inserted into the pre-located empty slot without a second lookup. pub struct Vacant<'a> { /// The parent policy that owns the hash table. policy: &'a mut NoPolicy, @@ -178,85 +74,31 @@ impl traits::EntryExt for Vacant<'_> { #[inline] fn would_exceed(&self, extra_size: usize) -> bool { - self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize.get() + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() } #[inline(always)] - fn evict(&mut self) -> pyo3::PyResult { - self.policy.evict(self.shared) + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { + self.policy.evict(py, self.shared) } } impl traits::VacantExt for Vacant<'_> { fn insert(self, handle: Self::Handle) { - self.policy.currsize = self.policy.currsize.saturating_add(handle.size); + self.shared.generation_version().increment(); + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); if !self.space_available { - self.policy.table.reserve(1, |x| x.key.hash()); + self.policy.table.reserve(1, |x| x.key().hash()); } unsafe { - self.policy.table.insert_no_grow(handle.key.hash(), handle); - } - - self.shared.gv.increment(); - } -} - -pub struct Shared { - // Hard upper bound on `currsize`. Stored as [`NonZeroUsize`](std::num::NonZeroUsize) - /// so the compiler can elide a zero-check branch in division/comparison hot paths. - maxsize: std::num::NonZeroUsize, - /// Monotonically incrementing counter bumped on every structural mutation - /// (insert, remove, clear, shrink). Used to detect iterator invalidation. - gv: utils::GenerationVersion, - /// Callable used to measure the memory footprint of each key-value pair. - getsizeof: utils::GetsizeofFunction, -} - -impl Shared { - /// Creates a new [`NoPolicy`]. - #[inline] - pub fn new(maxsize: usize, getsizeof: Option) -> Self { - Self { - maxsize: safe_non_zero!(maxsize), - // currsize: atomic::AtomicUsize::new(0), - gv: utils::GenerationVersion::default(), - getsizeof: utils::GetsizeofFunction::new(getsizeof), - } - } -} - -impl traits::SharedExt for Shared { - #[inline] - fn maxsize(&self) -> usize { - self.maxsize.get() - } - - #[inline] - fn generation_version(&self) -> utils::GenerationVersion { - self.gv.clone() - } - - #[inline] - fn getsizeof(&self) -> &utils::GetsizeofFunction { - &self.getsizeof - } - - fn clone_ref(&self, py: pyo3::Python) -> Self { - Self { - maxsize: self.maxsize, - gv: Default::default(), - getsizeof: self.getsizeof.clone_ref(py), + self.policy + .table + .insert_no_grow(handle.key().hash(), handle); } } } -/// A cache policy that performs **no eviction**. -/// -/// Insertions are rejected once `currsize` would exceed `maxsize`; the caller -/// must free space manually or accept the refusal. This is useful when the -/// eviction strategy is handled externally, or when a hard size cap with no -/// silent data loss is desired. pub struct NoPolicy { /// The raw hash table storing all live [`Handle`] entries. table: hashbrown::raw::RawTable, @@ -307,9 +149,8 @@ impl traits::PolicyExt for NoPolicy { &mut self, py: pyo3::Python, key: &::Key, - _shared: &Self::Shared, ) -> pyo3::PyResult> { - let bucket = self.table.find(key.hash(), |x| key.py_eq(py, &x.key))?; + let bucket = self.table.find(key.hash(), |x| key.py_eq(py, x.key()))?; Ok(bucket.map(|x| unsafe { x.as_ref() })) } @@ -319,7 +160,7 @@ impl traits::PolicyExt for NoPolicy { key: &::Key, shared: &'a Self::Shared, ) -> pyo3::PyResult, Self::Vacant<'a>>> { - match self.table.find(key.hash(), |x| key.py_eq(py, &x.key))? { + match self.table.find(key.hash(), |x| key.py_eq(py, x.key()))? { Some(bucket) => { let result = Occupied { policy: self, @@ -340,7 +181,7 @@ impl traits::PolicyExt for NoPolicy { } #[inline] - fn evict(&mut self, _shared: &Self::Shared) -> pyo3::PyResult { + fn evict(&mut self, _py: pyo3::Python, _shared: &Self::Shared) -> pyo3::PyResult { Err(new_py_error!( PyOverflowError, "The cache has no algorithm to evict items" @@ -350,10 +191,10 @@ impl traits::PolicyExt for NoPolicy { #[inline] fn shrink_to_fit(&mut self, shared: &Self::Shared) { let initial = self.table.capacity(); - self.table.shrink_to(0, |x| x.key.hash()); + self.table.shrink_to(0, |x| x.key().hash()); if initial != self.table.capacity() { - shared.gv.increment(); + shared.generation_version().increment(); } } @@ -363,7 +204,7 @@ impl traits::PolicyExt for NoPolicy { return; } self.table.clear(); - shared.gv.increment(); + shared.generation_version().increment(); self.currsize = 0; } @@ -374,9 +215,7 @@ impl traits::PolicyExt for NoPolicy { other: &Self, other_shared: &Self::Shared, ) -> pyo3::PyResult { - if shared.maxsize.get() != other_shared.maxsize.get() - || self.table.len() != other.table.len() - { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { return Ok(false); } @@ -387,7 +226,7 @@ impl traits::PolicyExt for NoPolicy { iterator.all(|handle_1| { let result = other .table - .get(handle_1.key.hash(), |x| handle_1.key.py_eq(py, &x.key)); + .get(handle_1.key().hash(), |x| handle_1.key().py_eq(py, x.key())); match result { Err(e) => { @@ -424,7 +263,7 @@ impl traits::PolicyExt for NoPolicy { unsafe { for handle in self.table.iter().map(|x| x.as_ref()) { - table.insert_no_grow(handle.key.hash(), handle.clone_ref(py)); + table.insert_no_grow(handle.key().hash(), handle.clone_ref(py)); } } diff --git a/src/policies/traits.rs b/src/policies/traits.rs index 917789b..a3aa8b4 100644 --- a/src/policies/traits.rs +++ b/src/policies/traits.rs @@ -21,8 +21,7 @@ pub trait EntryExt { type Shared: SharedExt; type Handle: HandleExt; - /// Returns `true` if adding `extra_size` would meet or exceed - /// [`PolicyExt::weight_limit`]. + /// Returns `true` if adding `extra_size` would meet or exceed [`SharedExt::maxsize`]. /// /// Call this *before* [`OccupiedExt::replace`] or [`VacantExt::insert`]. fn would_exceed(&self, extra_size: usize) -> bool; @@ -32,14 +31,14 @@ pub trait EntryExt { /// # Errors /// /// Returns any Python exception raised while dropping the evicted value. - fn evict(&mut self) -> pyo3::PyResult; + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult; } /// Guard for an *occupied* slot. pub trait OccupiedExt: EntryExt { /// Replaces the current handle with `new`, returning the old one. /// - /// Does **not** enforce the weight budget; call + /// Does **not** enforce maxsize; call /// [`would_exceed`](EntryExt::would_exceed) first. fn replace(self, new: Self::Handle) -> Self::Handle; @@ -67,7 +66,7 @@ pub trait SharedExt: Send + Sync { fn maxsize(&self) -> usize; /// Returns the generation version. - fn generation_version(&self) -> utils::GenerationVersion; + fn generation_version(&self) -> &utils::GenerationVersion; /// Returns a reference to configued getsizeof function. fn getsizeof(&self) -> &utils::GetsizeofFunction; @@ -102,7 +101,6 @@ pub trait PolicyExt { &mut self, py: pyo3::Python, key: &::Key, - shared: &Self::Shared, ) -> pyo3::PyResult>; /// Returns a [`PolicyEntry`] for the slot at `hash` / `eq`. @@ -126,7 +124,7 @@ pub trait PolicyExt { /// # Panics /// /// May panic if the policy is empty. - fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult; + fn evict(&mut self, py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult; /// Removes all handles without shrinking the allocation. fn clear(&mut self, shared: &Self::Shared); diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs index 96f4fdc..dcee788 100644 --- a/src/policies/wrapped.rs +++ b/src/policies/wrapped.rs @@ -63,7 +63,7 @@ fn insert_inner( PolicyEntry::Occupied(mut occupied) => { // Evict if need while occupied.would_exceed(handle.size()) { - occupied.evict()?; + occupied.evict(py)?; } Ok(Some(occupied.replace(handle))) @@ -71,7 +71,7 @@ fn insert_inner( PolicyEntry::Vacant(mut vacant) => { // Evict if need while vacant.would_exceed(handle.size()) { - vacant.evict()?; + vacant.evict(py)?; } vacant.insert(handle); @@ -98,7 +98,7 @@ impl Wrapped

{ ) -> pyo3::PyResult { let mut lock = self.inner.lock(); - let handle = lock.get(py, key, &self.shared)?; + let handle = lock.get(py, key)?; Ok(handle.is_some()) } @@ -215,7 +215,7 @@ impl Wrapped

{ let mut count: pyo3::ffi::Py_ssize_t = 0; while count < n { - match lock.evict(&self.shared) { + match lock.evict(py, &self.shared) { Ok(_) => {} Err(err) => { if !err.is_instance_of::(py) { diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index 1516345..fb6bcdd 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -23,18 +23,18 @@ implement_pyclass! { /// frequency counters, or expiry timestamps. /// /// ### Pros - /// - Minimal overhead - no bookkeeping for eviction means lower CPU and memory usage per entry compared + /// - Minimal overhead: no bookkeeping for eviction means lower CPU and memory usage per entry compared /// to policy-based caches. - /// - Predictable behavior - items are never silently removed, so cache hits are deterministic once an + /// - Predictable behavior: items are never silently removed, so cache hits are deterministic once an /// item is stored. - /// - Thread-safe - safe for concurrent reads and writes out of the box. - /// - Configurable capacity - a hard size limit prevents unbounded memory growth. + /// - Thread-safe: safe for concurrent reads and writes out of the box. + /// - Configurable capacity: a hard size limit prevents unbounded memory growth. /// /// ### Cons - /// - No automatic eviction - the cache can fill up and stop accepting new entries if a max size is set, + /// - No automatic eviction: the cache can fill up and stop accepting new entries if a max size is set, /// requiring manual management. - /// - Unordered - unlike a standard dict (Python 3.7+), insertion order is not preserved. - /// - Not suitable for volatile data - stale entries persist forever unless explicitly invalidated. + /// - Unordered: unlike a standard dict (Python 3.7+), insertion order is not preserved. + /// - Not suitable for volatile data: stale entries persist forever unless explicitly invalidated. /// /// ## When to Use It /// `Cache` is the right choice when: @@ -64,7 +64,7 @@ impl PyCache { ) } - /// Initialize a new Cache instance. + /// Initialize a new `Cache` instance. /// /// Args: /// maxsize: Maximum number of elements the cache can hold. Zero means unlimited. @@ -109,22 +109,26 @@ impl PyCache { } #[getter] + #[inline] fn maxsize(&self) -> usize { let inner = self.0.get(); inner.shared().maxsize() } + #[inline] fn current_size(&self) -> usize { let inner = self.0.get(); inner.policy().current_size() } + #[inline] fn remaining_size(&self) -> usize { let inner = self.0.get(); inner.remaining_size() } #[getter] + #[inline] fn getsizeof(&self, py: pyo3::Python) -> Option { let inner = self.0.get(); inner.shared().getsizeof().clone_ref(py).into() @@ -200,7 +204,7 @@ impl PyCache { /// /// - If the cache did not have this key present, None is returned. /// - If the cache did have this key present, the value is updated, - /// and the old value is returned. The key is not updated, though; + /// and the old value is returned. The key is not updated, though. /// /// Note: raises `OverflowError` if the cache reached the maxsize limit, /// because this class does not have any algorithm. @@ -272,7 +276,7 @@ impl PyCache { let inner = self.0.get(); let mut policy = inner.policy(); - if let Some(x) = policy.get(py, &key, inner.shared())? { + if let Some(x) = policy.get(py, &key)? { return Ok(x.value().clone_ref(py)); } @@ -295,7 +299,7 @@ impl PyCache { let inner = self.0.get(); let mut policy = inner.policy(); - match policy.get(py, &key, inner.shared())? { + match policy.get(py, &key)? { Some(x) => Ok(x.value().clone_ref(py)), None => Err(new_py_error!( PyKeyError, @@ -323,7 +327,7 @@ impl PyCache { let shared = inner.shared(); let mut policy = inner.policy(); - if let Some(x) = policy.get(py, &key, shared)? { + if let Some(x) = policy.get(py, &key)? { return Ok(x.value().clone_ref(py)); } drop(policy); @@ -390,11 +394,11 @@ impl PyCache { /// Remove and return a (key, value) pair as a 2-tuple. /// /// NOTE: `Cache` always raises `NotImplementedError` because has neither policy nor algorithm to evict items. - fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { let inner = self.0.get(); let mut policy = inner.policy(); - let handle = policy.evict(inner.shared())?; + let handle = policy.evict(py, inner.shared())?; drop(policy); let (key, val) = handle.into_pair(); @@ -484,7 +488,7 @@ impl PyCache { fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let gv = inner.shared().generation_version(); + let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] @@ -498,7 +502,7 @@ impl PyCache { fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let gv = inner.shared().generation_version(); + let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] @@ -512,7 +516,7 @@ impl PyCache { fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let gv = inner.shared().generation_version(); + let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs new file mode 100644 index 0000000..b615acd --- /dev/null +++ b/src/pyclasses/fifocache.rs @@ -0,0 +1,615 @@ +use crate::internal::alias; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::fifopolicy; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A First-In-First-Out (FIFO) cache eviction policy: when the cache is full, the oldest + /// inserted item is always the first to be removed, regardless of how often it has been accessed. + /// + /// ## How It Works + /// The FIFO algorithm is one of the simplest cache eviction strategies. Items are stored in + /// insertion order, and when the cache reaches capacity, the item that has been there the + /// longest is evicted to make room. There is no concept of "recently used" or "frequently used" + /// - age alone determines eviction order. Conceptually, it behaves like a queue: new items + /// join the back, and evictions come from the front. + /// + /// This implementation backs that queue with a `double-ended queue` for O(1) front removal, + /// paired with a `hash map` for O(1) key lookups. Rather than storing physical indices into + /// the deque (which shift every time an item is evicted from the front), the table stores + /// logical indices - a monotonically increasing counter assigned at insertion time. + /// A separate `front_offset` counter tracks how many items have ever been evicted; the physical + /// position of any key is recovered at read time as `entries[table[key] - front_offset]`, + /// keeping both eviction and lookup O(1) without any per-eviction rewriting of the table. + /// + /// ### Pros + /// - Insert, lookup, and evict are all O(1) amortized: the `front_offset` trick eliminates the O(n) + /// index-shifting that a native implementation would require on every eviction. + /// - Eviction order is fully deterministic: the oldest item always goes first, independent of access + /// patterns, making behaviour easy to reason about and reproduce in tests. + /// - No per-read overhead. Unlike LRU, FIFO requires no bookkeeping on cache hits. + /// + /// ### Cons + /// - Access-blind eviction. A hot item accessed thousands of times is evicted just as readily as one + /// that has never been read. Hit rates suffer on workloads with strong temporal locality. + /// - The logical-index indirection adds a layer of internal complexity compared to a naïve queue-based cache. + /// - The rare O(n) index rebase (triggered when `front_offset` nears `usize::MAX - isize::MAX`) introduces + /// an occasional latency spike. Amortized cost is negligible, but worst-case latency is unbounded in principle. + /// + /// ## When to use it + /// Reach for `FIFOPolicy` when: + /// - Eviction order must be predictable and auditable: streaming pipelines, sequential batch processors, or + /// any context where deterministic behaviour simplifies debugging. + /// - Access patterns are roughly uniform, so there is no meaningful "hot" subset of keys that a recency or + /// frequency-aware policy could exploit. + /// - Read overhead must be minimal: FIFO's zero-cost hits make it preferable to LRU in insert-heavy workloads + /// with infrequent re-reads. + /// + /// Avoid it when your workload has strong temporal locality. If recently or frequently accessed items are likely + /// to be needed again soon, an LRU or LFU policy will deliver meaningfully better hit rates. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyFIFOCache as "FIFOCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyFIFOCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `FIFOCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. Zero means unlimited. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let wrapped = Wrapped::new( + fifopolicy::FIFOPolicy::new(capacity), + fifopolicy::Shared::new(maxsize, getsizeof), + ); + + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| fifopolicy::Handle::new(py, &getsizeof, key, value), + ); + self.0.set(wrapped); + result + } else { + self.0.set(wrapped); + Ok(()) + } + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + inner.policy().current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity().min(policy.vecdeque().capacity()) + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + debug_assert!(policy.table().len() == policy.vecdeque().len()); + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + let table_cap = policy.table().capacity() * std::mem::size_of::(); + let vecdeque_cap = policy.vecdeque().capacity() * std::mem::size_of::(); + table_cap + vecdeque_cap + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let handle = fifopolicy::Handle::new(py, inner.shared().getsizeof(), key, value)?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let inner = slf.0.get(); + let getsizeof = inner.shared().getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| fifopolicy::Handle::new(py, &getsizeof, key, value), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get<'p>( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument<'p>, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x.unbind(), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = fifopolicy::Handle::with_precomputed_hash_key( + py, + shared.getsizeof(), + key, + default_object.clone_ref(py), + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(py, inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + // fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + // let inner = self.0.get(); + // let gv = inner.shared().generation_version().clone(); + // let initial_gv = gv.get(); + + // // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + // let result = PyCacheItems { + // iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + // gv, + // initial_gv, + // }; + // pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + // } + + // fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + // let inner = self.0.get(); + // let gv = inner.shared().generation_version().clone(); + // let initial_gv = gv.get(); + + // // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + // let result = PyCacheValues { + // iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + // gv, + // initial_gv, + // }; + // pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + // } + + // fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + // let inner = self.0.get(); + // let gv = inner.shared().generation_version().clone(); + // let initial_gv = gv.get(); + + // // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + // let result = PyCacheKeys { + // iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + // gv, + // initial_gv, + // }; + // pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + // } + + // #[inline] + // fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { + // self.keys(py) + // } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + let iter = policy.vecdeque().iter().map(|handle| { + ( + // Without using `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }); + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[{}/{}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + policy.current_size(), + shared.maxsize(), + items + ) + } + + #[pyo3(signature = (n=0))] + fn first(&self, py: pyo3::Python, mut n: pyo3::ffi::Py_ssize_t) -> Option { + let inner = self.0.get(); + let policy = inner.policy(); + + if n < 0 { + n = (policy.vecdeque().len() as isize) + n; + } + if n < 0 { + return None; + } + + let handle = policy.vecdeque().get(n as usize)?; + Some(handle.key().as_ref().clone_ref(py)) + } + + fn last(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + let policy = inner.policy(); + let handle = policy.vecdeque().back()?; + Some(handle.key().as_ref().clone_ref(py)) + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + let inner = self.0.get(); + let policy = inner.policy(); + + for handle in policy.vecdeque().iter() { + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} diff --git a/src/pyclasses/mod.rs b/src/pyclasses/mod.rs index 92c1625..1072b2d 100644 --- a/src/pyclasses/mod.rs +++ b/src/pyclasses/mod.rs @@ -1,2 +1,3 @@ pub mod base; pub mod cache; +pub mod fifocache; diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..4c8a3a2 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,9 @@ +from hypothesis import HealthCheck, settings + +# Register a custom profile that suppresses the health check +settings.register_profile( + "global_fuzz_settings", suppress_health_check=[HealthCheck.differing_executors] +) + +# Load the profile globally for the entire test run +settings.load_profile("global_fuzz_settings") diff --git a/tests/fifocache.py b/tests/fifocache.py new file mode 100644 index 0000000..48ce9f5 --- /dev/null +++ b/tests/fifocache.py @@ -0,0 +1,290 @@ +import typing + +import pytest + +import cachebox + +from . import mixins + + +class TestFIFOCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + # mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.BaseCacheImpl: + return cachebox.FIFOCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + +class TestFIFOCachePolicy(mixins.BaseMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.FIFOCache: + return cachebox.FIFOCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + def test_oldest_item_evicted_on_overflow(self): + """When capacity is exceeded, the first inserted key must be evicted.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" # triggers eviction of key 1 + assert 1 not in cache + assert 4 in cache + + def test_eviction_is_strictly_insertion_ordered(self): + """Keys evict in the exact order they were inserted, not access order.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + + cache[4] = "d" # evicts 1 + cache[5] = "e" # evicts 2 + cache[6] = "f" # evicts 3 + + assert 1 not in cache + assert 2 not in cache + assert 3 not in cache + assert {4, 5, 6} == set(cache.keys()) + + def test_accessing_key_does_not_reset_eviction_priority(self): + """ + Unlike LRU, a cache hit must NOT push the key to the back. + Key 1 is accessed repeatedly but must still be the first evicted. + """ + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + + _ = cache[1] + _ = cache[1] + _ = cache[1] + + cache[4] = "d" # must still evict key 1 + assert 1 not in cache + + def test_overwriting_existing_key_does_not_change_eviction_order(self): + """ + Updating the value of an existing key must NOT change its insertion + position in the eviction queue. + """ + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + + cache[1] = "updated" # update, not a new insertion + cache[4] = "d" # must still evict key 1 + + assert 1 not in cache + assert cache[4] == "d" + + def test_popitem_removes_oldest(self): + """popitem() must always remove and return the oldest inserted entry.""" + cache = self.create_cache(3, [(10, "x"), (20, "y"), (30, "z")]) + key, value = cache.popitem() + assert key == 10 + assert value == "x" + + def test_popitem_successive_calls_follow_fifo(self): + """Successive popitem() calls must yield keys in insertion order.""" + insertion_order = [(1, "a"), (2, "b"), (3, "c"), (4, "d")] + cache = self.create_cache(4, insertion_order) + popped_keys = [cache.popitem()[0] for _ in range(4)] + assert popped_keys == [1, 2, 3, 4] + + def test_drain_removes_n_oldest(self): + """drain(n) must remove exactly n items, oldest-first.""" + cache = self.create_cache(5, [(i, str(i)) for i in range(1, 6)]) + removed = cache.drain(3) + assert removed == 3 + assert 1 not in cache + assert 2 not in cache + assert 3 not in cache + assert 4 in cache + assert 5 in cache + + def test_first_returns_oldest_key(self): + cache = self.create_cache(3, [(7, "a"), (8, "b"), (9, "c")]) + assert cache.first() == 7 + + def test_last_returns_newest_key(self): + cache = self.create_cache(3, [(7, "a"), (8, "b"), (9, "c")]) + assert cache.last() == 9 + + def test_first_with_positive_n_browses_in_insertion_order(self): + """first(n) must walk forward through insertion order.""" + cache = self.create_cache(4, [(10, "a"), (20, "b"), (30, "c"), (40, "d")]) + assert cache.first(0) == 10 + assert cache.first(1) == 20 + assert cache.first(2) == 30 + assert cache.first(3) == 40 + + def test_first_with_negative_n_browses_from_end(self): + """first(-1) is an alias for last(); first(-2) is the second newest.""" + cache = self.create_cache(4, [(10, "a"), (20, "b"), (30, "c"), (40, "d")]) + assert cache.first(-1) == 40 + assert cache.first(-2) == 30 + + def test_first_after_eviction_reflects_new_head(self): + """After an eviction, first() must return the new oldest key.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" # evicts key 1 + assert cache.first() == 2 + + def test_last_after_insertion_reflects_new_tail(self): + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" + assert cache.last() == 4 + + def test_first_on_single_element_cache(self): + cache = self.create_cache(1, [(42, "only")]) + assert cache.first() == 42 + assert cache.last() == 42 + + def test_first_returns_none_on_empty_cache(self): + cache = self.create_cache(0) + assert cache.first() is None + + def test_rolling_window_maintains_correct_contents(self): + """ + Simulate a sliding-window workload: insert N items into a cache of + size K and verify that only the most-recently inserted K items survive. + """ + maxsize = 4 + total = 20 + cache = self.create_cache(maxsize) + + for i in range(total): + cache[i] = i * 10 + + expected = set(range(total - maxsize, total)) + assert set(cache.keys()) == expected + + def test_no_phantom_keys_after_eviction(self): + """Evicted keys must not linger in contains() or iteration.""" + cache = self.create_cache(2, [(1, "a"), (2, "b")]) + cache[3] = "c" # evicts 1 + + for key in cache: + assert key != 1 + + assert not cache.contains(1) + + def test_reinsert_evicted_key_rejoins_at_tail(self): + """ + Re-inserting a previously evicted key must treat it as a brand-new + entry positioned at the back of the queue. + """ + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" # evicts 1 + cache[1] = "re" # re-insert 1 — should now be at the tail + cache[5] = "e" # must evict 2 (now the oldest), not 1 + + assert 2 not in cache + assert 1 in cache + assert cache[1] == "re" + + def test_is_full_triggers_at_maxsize(self): + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + assert cache.is_full() + cache[4] = "d" # eviction should keep it full, not overflow + assert cache.is_full() + assert len(cache) == 3 + + def test_len_never_exceeds_maxsize(self): + cache = self.create_cache(5) + for i in range(100): + cache[i] = i + assert len(cache) <= 5 + + def test_clear_resets_fifo_order(self): + """After clear(), the insertion order restarts from scratch.""" + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) + cache.clear() + cache[10] = "x" + cache[20] = "y" + cache[30] = "z" + assert cache.first() == 10 + assert cache.last() == 30 + + @pytest.mark.skipif( + not hasattr(cachebox, "_fifocache_small_offset"), + reason="requires fifocache-small-offset feature flag", + ) + def test_edge_case_of_front_offset_overflow(self): + """ + Verifies that FIFOCache correctly rebases its internal `front_offset` + counter when it approaches `u8::MAX` (255 in the small-offset test build). + """ + U8_MAX = 255 + CACHE_SIZE = 10 + + cache = self.create_cache(CACHE_SIZE) + + # drive front_offset to the rebase boundary + total_insertions = U8_MAX + CACHE_SIZE # 265 + for i in range(total_insertions): + cache.insert(i, i * 10) + + # Snapshot what *should* be alive: the last CACHE_SIZE keys inserted + expected_keys = set(range(total_insertions - CACHE_SIZE, total_insertions)) + + # verify the cache is structurally sound after the rebase + assert len(cache) == CACHE_SIZE + assert cache.is_full() + + # Exact contents — no phantom or missing keys + # TODO: uncomment + # assert set(cache.keys()) == expected_keys + + # FIFO ordering must be intact + assert cache.first() == min(expected_keys) + assert cache.last() == max(expected_keys) + + # All surviving values are correct + for key in expected_keys: + assert cache[key] == key * 10 + + # All evicted keys are truly gone + for evicted in range(total_insertions - CACHE_SIZE): + assert evicted not in cache + + # Prove the cache keeps working normally after the rebase + + # New insertions must evict the oldest surviving key (min of expected_keys) + next_key = total_insertions # 265 + oldest_before = cache.first() + cache.insert(next_key, next_key * 10) + + assert oldest_before not in cache # oldest was evicted + assert cache[next_key] == next_key * 10 # new entry is present + assert cache.last() == next_key # sits at the tail + assert len(cache) == CACHE_SIZE # size is unchanged + + # Ordering of the remainder is still correct + assert cache.first() == min(expected_keys) + 1 + + # popitem() must still yield the oldest entry + oldest_key, oldest_val = cache.popitem() + assert oldest_val == oldest_key * 10 diff --git a/tests/mixins.py b/tests/mixins.py index e6b4033..56384f6 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -12,6 +12,8 @@ hashable_keys = st.one_of( st.text(), st.integers(), + st.floats(allow_nan=False), + st.decimals(allow_nan=False), st.tuples(st.integers(), st.integers()), ) @@ -226,6 +228,8 @@ def test_update_overwrites_existing(self): cache.update({"a": 99}) assert cache.get("a") == 99 + # TODO: test invalid arguments + class IntrospectionMixin(BaseMixin): def test_len_reflects_insertions(self): @@ -242,13 +246,13 @@ def test_current_size_equals_len_without_getsizeof(self): cache.insert("a", 1) cache.insert("b", 2) - assert cache.current_size == len(cache) + assert cache.current_size() == len(cache) def test_remaining_size(self): cache = self.create_cache() cache.insert("a", 1) - assert cache.remaining_size == cache.maxsize - cache.current_size + assert cache.remaining_size() == cache.maxsize - cache.current_size() def test_is_empty_on_new_cache(self): cache = self.create_cache() @@ -331,6 +335,8 @@ def test_iter_yields_keys(self): cache.update({"x": 10, "y": 20}) assert set(iter(cache)) == {"x", "y"} + # TODO: test generation version + class DrainClearShrinkMixin(BaseMixin): def test_clear_removes_all_items(self): @@ -340,7 +346,7 @@ def test_clear_removes_all_items(self): cache.clear() assert len(cache) == 0 assert cache.is_empty() - assert cache.current_size == 0 + assert cache.current_size() == 0 def test_clear_with_reuse(self): cache = self.create_cache() @@ -399,7 +405,7 @@ def test_current_size_uses_getsizeof(self): c = self.create_cache(maxsize=10, getsizeof=sizer) c.insert("a", [1, 2, 3]) # size 3 c.insert("b", [1]) # size 1 - assert c.current_size == 4 + assert c.current_size() == 4 def test_overflow_based_on_weighted_size(self): # maxsize=5; each entry costs its value @@ -490,28 +496,31 @@ def test_issue_5(self): cache.get(EQ(val=i)) +# TODO: test rare usages, such as "same hash but not-equal", "unhashable keys" + + class FuzzyMixin(BaseMixin): @given(key=hashable_keys, value=any_value) - def test_insert_then_get_returns_same_value(self, key, value): + def test_fuzzy_insert_then_get_returns_same_value(self, key, value): c = self.create_cache(maxsize=0) c.insert(key, value) assert c.get(key) == value @given(key=hashable_keys, value=any_value) - def test_insert_new_key_returns_none(self, key, value): + def test_fuzzy_insert_new_key_returns_none(self, key, value): c = self.create_cache(maxsize=0) result = c.insert(key, value) assert result is None @given(key=hashable_keys, v1=any_value, v2=any_value) - def test_insert_existing_key_returns_old_value(self, key, v1, v2): + def test_fuzzy_insert_existing_key_returns_old_value(self, key, v1, v2): c = self.create_cache(maxsize=0) c.insert(key, v1) old = c.insert(key, v2) assert old == v1 @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) - def test_len_never_exceeds_unique_keys(self, pairs): + def test_fuzzy_len_never_exceeds_unique_keys(self, pairs): c = self.create_cache(maxsize=0) expected = {} for k, v in pairs: @@ -520,14 +529,14 @@ def test_len_never_exceeds_unique_keys(self, pairs): assert len(c) == len(expected) @given(key=hashable_keys, value=any_value) - def test_len_increases_by_one_on_new_key(self, key, value): + def test_fuzzy_len_increases_by_one_on_new_key(self, key, value): c = self.create_cache(maxsize=0) before = len(c) c.insert(key, value) assert len(c) == before + 1 @given(key=hashable_keys, v1=any_value, v2=any_value) - def test_len_unchanged_on_overwrite(self, key, v1, v2): + def test_fuzzy_len_unchanged_on_overwrite(self, key, v1, v2): c = self.create_cache(maxsize=0) c.insert(key, v1) before = len(c) @@ -535,27 +544,27 @@ def test_len_unchanged_on_overwrite(self, key, v1, v2): assert len(c) == before @given(key=hashable_keys, value=any_value) - def test_contains_true_after_insert(self, key, value): + def test_fuzzy_contains_true_after_insert(self, key, value): c = self.create_cache(maxsize=0) c.insert(key, value) assert key in c assert c.contains(key) @given(key=hashable_keys, value=any_value) - def test_contains_false_after_delete(self, key, value): + def test_fuzzy_contains_false_after_delete(self, key, value): c = self.create_cache(maxsize=0) c.insert(key, value) del c[key] assert key not in c @given(key=hashable_keys, value=any_value) - def test_pop_returns_inserted_value(self, key, value): + def test_fuzzy_pop_returns_inserted_value(self, key, value): c = self.create_cache(maxsize=0) c.insert(key, value) assert c.pop(key) == value @given(key=hashable_keys, value=any_value) - def test_pop_removes_key(self, key, value): + def test_fuzzy_pop_removes_key(self, key, value): c = self.create_cache(maxsize=0) c.insert(key, value) c.pop(key) @@ -565,16 +574,16 @@ def test_pop_removes_key(self, key, value): maxsize=st.integers(min_value=1, max_value=50), pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=50), ) - def test_current_size_plus_remaining_equals_maxsize(self, maxsize, pairs): + def test_fuzzy_current_size_plus_remaining_equals_maxsize(self, maxsize, pairs): c = self.create_cache(maxsize=maxsize) for k, v in pairs: if c.is_full(): break c.insert(k, v) - assert c.current_size + c.remaining_size == maxsize + assert c.current_size() + c.remaining_size() == maxsize @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) - def test_clear_always_leaves_cache_empty(self, pairs): + def test_fuzzy_clear_always_leaves_cache_empty(self, pairs): c = self.create_cache(maxsize=0) for k, v in pairs: c.insert(k, v) @@ -583,7 +592,7 @@ def test_clear_always_leaves_cache_empty(self, pairs): assert c.is_empty() @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) - def test_keys_values_items_are_consistent(self, pairs): + def test_fuzzy_keys_values_items_are_consistent(self, pairs): c = self.create_cache(maxsize=0) truth = {} for k, v in pairs: @@ -598,20 +607,20 @@ def test_keys_values_items_are_consistent(self, pairs): ) @given(key=hashable_keys, existing=any_value, default=any_value) - def test_setdefault_never_overwrites_existing(self, key, existing, default): + def test_fuzzy_setdefault_never_overwrites_existing(self, key, existing, default): c = self.create_cache(maxsize=0) c.insert(key, existing) c.setdefault(key, default) assert c.get(key) == existing @given(key=hashable_keys, default=any_value) - def test_setdefault_inserts_when_missing(self, key, default): + def test_fuzzy_setdefault_inserts_when_missing(self, key, default): c = self.create_cache(maxsize=0) c.setdefault(key, default) assert c.get(key) == default @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) - def test_copy_equals_original(self, pairs): + def test_fuzzy_copy_equals_original(self, pairs): c = self.create_cache(maxsize=0) for k, v in pairs: c.insert(k, v) @@ -620,7 +629,9 @@ def test_copy_equals_original(self, pairs): @given( key=hashable_keys, value=any_value, new_key=hashable_keys, new_value=any_value ) - def test_copy_is_independent_of_original(self, key, value, new_key, new_value): + def test_fuzzy_copy_is_independent_of_original( + self, key, value, new_key, new_value + ): assume(new_key != key) c = self.create_cache(maxsize=0) c.insert(key, value) From caa643a5020acdb0a566edaa70791acb88360547 Mon Sep 17 00:00:00 2001 From: awolverp Date: Thu, 21 May 2026 19:53:51 +0330 Subject: [PATCH 12/60] Refactor FIFOCache --- cachebox/_core.pyi | 30 ++++++++ src/policies/fifopolicy.rs | 85 ++++++++++++++++++++ src/pyclasses/fifocache.rs | 154 ++++++++++++++++++++++++++----------- tests/fifocache.py | 5 +- tests/mixins.py | 60 ++++++++++----- 5 files changed, 265 insertions(+), 69 deletions(-) diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 1905ecb..38d295f 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -357,6 +357,36 @@ class FIFOCache(BaseCacheImpl[KT, VT]): """ ... + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + - Items are ordered. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Keys are ordered. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Values are ordered. + """ + ... + def first(self, n: int = 0) -> typing.Optional[KT]: """ Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index 404184f..e6f94f1 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -110,6 +110,85 @@ impl traits::VacantExt for Vacant<'_> { } } +/// Immutable slice iterator without lifetime +/// +/// # Safety +/// - You should be sure about lifetimes, and pointers should be alive while this type is alive. +/// Any changes to pointers can cause *Undefined Behaviour*. +/// - It doesn't support `ZST`s. +struct RawSliceIter { + pointer: std::ptr::NonNull, + index: usize, + len: usize, +} + +impl RawSliceIter { + /// Creates a new [`RawSliceIter`] + #[inline] + fn new(slice: &[T]) -> Self { + let pointer: std::ptr::NonNull = std::ptr::NonNull::from(slice).cast(); + + Self { + pointer, + index: 0, + len: slice.len(), + } + } +} + +impl Iterator for RawSliceIter { + type Item = std::ptr::NonNull; + + #[inline] + fn next(&mut self) -> Option { + if self.index >= self.len { + None + } else { + let value = unsafe { self.pointer.add(self.index) }; + self.index += 1; + Some(value) + } + } +} + +unsafe impl Send for RawSliceIter {} +unsafe impl Sync for RawSliceIter {} + +/// Raw iterator for [`VecDeque`] which doesn't have lifetime. +/// +/// # Safety +/// You should track changes of [`VecDeque`] yourself. +pub struct RawVecDequeIter { + first: RawSliceIter, + second: RawSliceIter, +} + +impl RawVecDequeIter { + /// Creates a new [`RawVecDequeIter`] + #[inline] + fn new(first: &[T], second: &[T]) -> Self { + Self { + first: RawSliceIter::new(first), + second: RawSliceIter::new(second), + } + } +} + +impl Iterator for RawVecDequeIter { + type Item = std::ptr::NonNull; + + #[inline] + fn next(&mut self) -> Option { + match self.first.next() { + Some(val) => Some(val), + None => { + std::mem::swap(&mut self.first, &mut self.second); + self.first.next() + } + } + } +} + pub struct FIFOPolicy { /// Maps each key to its logical index into [`FIFOPolicy::entries`], enabling O(1) lookups. /// @@ -226,6 +305,12 @@ impl FIFOPolicy { // Both branches now store raw VecDeque indices, so the offset is zero. self.front_offset = 0; } + + #[inline] + pub unsafe fn iter(&self) -> RawVecDequeIter { + let (first, second) = self.entries.as_slices(); + RawVecDequeIter::new(first, second) + } } impl traits::PolicyExt for FIFOPolicy { diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index b615acd..33777b7 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -490,52 +490,52 @@ impl PyFIFOCache { .map(|x| !x) } - // fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { - // let inner = self.0.get(); - // let gv = inner.shared().generation_version().clone(); - // let initial_gv = gv.get(); - - // // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] - // let result = PyCacheItems { - // iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), - // gv, - // initial_gv, - // }; - // pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) - // } - - // fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { - // let inner = self.0.get(); - // let gv = inner.shared().generation_version().clone(); - // let initial_gv = gv.get(); - - // // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] - // let result = PyCacheValues { - // iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), - // gv, - // initial_gv, - // }; - // pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) - // } - - // fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { - // let inner = self.0.get(); - // let gv = inner.shared().generation_version().clone(); - // let initial_gv = gv.get(); - - // // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] - // let result = PyCacheKeys { - // iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), - // gv, - // initial_gv, - // }; - // pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) - // } - - // #[inline] - // fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { - // self.keys(py) - // } + fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyFIFOCacheItems { + iter: parking_lot::Mutex::new(unsafe { inner.policy().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyFIFOCacheValues { + iter: parking_lot::Mutex::new(unsafe { inner.policy().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyFIFOCacheKeys { + iter: parking_lot::Mutex::new(unsafe { inner.policy().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + #[inline] + fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.keys(py) + } fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); @@ -613,3 +613,65 @@ impl PyFIFOCache { policy.clear(inner.shared()); } } + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] + $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let mut iter = slf.iter.lock(); + + match iter.next() { + Some(x) => { + let $py = slf.py(); + let $handle = unsafe { x.as_ref() }; + Ok($init) + } + None => return Err(new_py_error!(PyStopIteration, ())), + } + } + } + )+ + }; +} +implement_iterator!( + PyFIFOCacheItems as "fifocache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyFIFOCacheKeys as "fifocache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyFIFOCacheValues as "fifocache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/tests/fifocache.py b/tests/fifocache.py index 48ce9f5..8edd5a6 100644 --- a/tests/fifocache.py +++ b/tests/fifocache.py @@ -14,7 +14,7 @@ class TestFIFOCache( mixins.PopAndDeleteMixin, mixins.UpdateMixin, mixins.IntrospectionMixin, - # mixins.IterationMixin, + mixins.IterationMixin, mixins.DrainClearShrinkMixin, mixins.CopyMixin, mixins.GetSizeOfMixin, @@ -255,8 +255,7 @@ def test_edge_case_of_front_offset_overflow(self): assert cache.is_full() # Exact contents — no phantom or missing keys - # TODO: uncomment - # assert set(cache.keys()) == expected_keys + assert set(cache.keys()) == expected_keys # FIFO ordering must be intact assert cache.first() == min(expected_keys) diff --git a/tests/mixins.py b/tests/mixins.py index 56384f6..c93aadb 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -228,7 +228,21 @@ def test_update_overwrites_existing(self): cache.update({"a": 99}) assert cache.get("a") == 99 - # TODO: test invalid arguments + def test_update_invalid_argument(self): + cache = self.create_cache() + + with pytest.raises(TypeError): + cache.update("abc") # type: ignore + + with pytest.raises(TypeError): + cache.update({1, 2, 3}) # type: ignore + + class _invalid_items: + def items(self): + return [1, 2, 3] + + with pytest.raises(TypeError): + cache.update(_invalid_items()) # type: ignore class IntrospectionMixin(BaseMixin): @@ -456,32 +470,41 @@ def test_multiple_types_as_values(self): assert cache.get("list") == [1, 2] assert cache.get("dict") == {"a": 1} + def test_bad_hash_key(self): -@dataclasses.dataclass -class EQ: - def __init__(self, val: int) -> None: - self.val = val - - def __eq__(self, other: "EQ") -> bool: # type: ignore - return self.val == other.val + @dataclasses.dataclass + class BadHash: + val: int - def __hash__(self) -> int: - return self.val + def __hash__(self) -> int: + return 1 + size = 1000 + cache = self.create_cache(size, capacity=size) -@dataclasses.dataclass -class NoEQ: - def __init__(self, val: int) -> None: - self.val = val - - def __hash__(self) -> int: - return self.val + for i in range(size): + cache.insert(BadHash(val=i), i) + cache.get(BadHash(val=i)) class IssuesMixin(BaseMixin): def test_issue_5(self): # https://github.com/awolverp/cachebox/issues/5 + @dataclasses.dataclass + class EQ: + val: int + + def __hash__(self) -> int: + return self.val + + @dataclasses.dataclass + class NoEQ: + val: int + + def __hash__(self) -> int: + return self.val + size = 1000 cache = self.create_cache(size, capacity=size) @@ -496,9 +519,6 @@ def test_issue_5(self): cache.get(EQ(val=i)) -# TODO: test rare usages, such as "same hash but not-equal", "unhashable keys" - - class FuzzyMixin(BaseMixin): @given(key=hashable_keys, value=any_value) def test_fuzzy_insert_then_get_returns_same_value(self, key, value): From eb67c66ff02cc0d58562c025f40ae505e4279ed2 Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 22 May 2026 12:10:15 +0330 Subject: [PATCH 13/60] Refactor RRCache --- Cargo.lock | 7 + Cargo.toml | 1 + cachebox/__init__.py | 1 + cachebox/_core.pyi | 86 ++++ src/lib.rs | 15 + src/policies/mod.rs | 1 + src/policies/rrpolicy.rs | 284 ++++++++++++ src/pyclasses/mod.rs | 1 + src/pyclasses/rrcache.rs | 638 ++++++++++++++++++++++++++ tests/cache.py | 78 ---- tests/mixins.py | 23 + tests/{fifocache.py => test_impls.py} | 118 +++++ 12 files changed, 1175 insertions(+), 78 deletions(-) create mode 100644 src/policies/rrpolicy.rs create mode 100644 src/pyclasses/rrcache.rs delete mode 100644 tests/cache.py rename tests/{fifocache.py => test_impls.py} (76%) diff --git a/Cargo.lock b/Cargo.lock index 721fdf3..7832ec4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,6 +13,7 @@ name = "cachebox" version = "6.0.0" dependencies = [ "cfg-if", + "fastrand", "parking_lot", "pyo3", "pyo3-build-config", @@ -34,6 +35,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + [[package]] name = "find-msvc-tools" version = "0.1.9" diff --git a/Cargo.toml b/Cargo.toml index 484fd95..3c99da4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ strip = true [dependencies] cfg-if = "1.0.4" +fastrand = "2.4.1" parking_lot = {version="0.12.5", default-features=false} pyo3 = {version="0.28.3", default-features=false, features=["macros", "generate-import-lib"]} # tokio = {version="1.52.3", default-features=false, features=["sync"]} diff --git a/cachebox/__init__.py b/cachebox/__init__.py index 52875e7..2fa87b4 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -1,6 +1,7 @@ from ._core import BaseCacheImpl as BaseCacheImpl from ._core import Cache as Cache from ._core import FIFOCache as FIFOCache +from ._core import RRCache as RRCache try: from ._core import ( diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 38d295f..e7726d8 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -400,3 +400,89 @@ class FIFOCache(BaseCacheImpl[KT, VT]): Returns the last key in cache. Equals to `self.first(-1)`. """ ... + +class RRCache(BaseCacheImpl[KT, VT]): + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + + It's recommended to use this method instead of `self[key] = value`, as it keeps code + compatible across different cache policies. + """ + ... + + def update(self, iterable: _IterableType[KT, VT]) -> None: + """ + Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + """ + ... + + def get( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a given key from the cache. + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts key with a value of default if key is not in the cache. + + Returns the value for key if key is in the cache, else default. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """Randomly selects and removes a (key, value) pair from the cache.""" + ... + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + - Items are not ordered. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Keys are not ordered. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Values are not ordered. + """ + ... + + def random_key(self) -> KT: + """ + Randomly selects and returns a key from the cache. + Raises `KeyError` If the cache is empty. + """ + ... diff --git a/src/lib.rs b/src/lib.rs index 5bf084f..4fc4896 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,6 +40,21 @@ mod _core { #[pymodule_export] use crate::pyclasses::fifocache::PyFIFOCache; + #[pymodule_export] + use crate::pyclasses::fifocache::PyFIFOCacheItems; + #[pymodule_export] + use crate::pyclasses::fifocache::PyFIFOCacheKeys; + #[pymodule_export] + use crate::pyclasses::fifocache::PyFIFOCacheValues; + + #[pymodule_export] + use crate::pyclasses::rrcache::PyRRCache; + #[pymodule_export] + use crate::pyclasses::rrcache::PyRRCacheItems; + #[pymodule_export] + use crate::pyclasses::rrcache::PyRRCacheKeys; + #[pymodule_export] + use crate::pyclasses::rrcache::PyRRCacheValues; #[pymodule_init] pub fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { diff --git a/src/policies/mod.rs b/src/policies/mod.rs index 2be38ae..b7361e9 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -3,5 +3,6 @@ pub mod traits; pub mod fifopolicy; pub mod nopolicy; +pub mod rrpolicy; pub mod wrapped; diff --git a/src/policies/rrpolicy.rs b/src/policies/rrpolicy.rs new file mode 100644 index 0000000..668fbb7 --- /dev/null +++ b/src/policies/rrpolicy.rs @@ -0,0 +1,284 @@ +use crate::hashbrown; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; + +pub use super::common::Handle; +pub use super::common::Shared; + +/// A view into an occupied entry in [`RRPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut RRPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied slot within the hash table. + bucket: hashbrown::raw::Bucket, +} + +impl traits::EntryExt for Occupied<'_> { + type Shared = Shared; + type Handle = Handle; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + let handle = unsafe { self.bucket.as_ref() }; + + self.policy + .currsize + .saturating_add(extra_size) + .saturating_sub(handle.size()) + > self.shared.maxsize() + } + + #[inline(always)] + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { + self.policy.evict(py, self.shared) + } +} + +impl traits::OccupiedExt for Occupied<'_> { + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + + let (h, _) = unsafe { self.policy.table.remove(self.bucket) }; + self.policy.currsize = self.policy.currsize.saturating_sub(h.size()); + h + } + + fn replace(self, new: Self::Handle) -> Self::Handle { + self.policy.currsize = self.policy.currsize.saturating_add(new.size()); + let old = unsafe { std::mem::replace(self.bucket.as_mut(), new) }; + self.policy.currsize = self.policy.currsize.saturating_sub(old.size()); + + old + } +} + +/// A view into a vacant slot in [`RRPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut RRPolicy, + /// The shared configuration + shared: &'a Shared, + /// If true, means we used `.evict()` method, and empty slots are available + /// in table; so we don't need to reserve a new one. + space_available: bool, +} + +impl traits::EntryExt for Vacant<'_> { + type Shared = Shared; + type Handle = Handle; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + #[inline(always)] + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { + self.policy.evict(py, self.shared) + } +} + +impl traits::VacantExt for Vacant<'_> { + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + if !self.space_available { + self.policy.table.reserve(1, |x| x.key().hash()); + } + unsafe { + self.policy + .table + .insert_no_grow(handle.key().hash(), handle); + } + } +} + +pub struct RRPolicy { + /// The raw hash table storing all live [`Handle`] entries. + table: hashbrown::raw::RawTable, + /// Running total of all stored handles' sizes, maintained incrementally. + currsize: usize, +} + +impl RRPolicy { + /// Creates a new [`RRPolicy`]. + /// + /// The underlying hash table is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + currsize: 0, + } + } + + /// Returns a reference to the underlying raw hash table. + #[inline(always)] + pub fn table(&self) -> &hashbrown::raw::RawTable { + &self.table + } +} + +impl PolicyExt for RRPolicy { + type Shared = Shared; + type Handle = Handle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let bucket = self.table.find(key.hash(), |x| key.py_eq(py, x.key()))?; + Ok(bucket.map(|x| unsafe { x.as_ref() })) + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + match self.table.find(key.hash(), |x| key.py_eq(py, x.key()))? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + space_available: false, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + #[inline] + fn evict(&mut self, _py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { + if self.table.is_empty() { + Err(new_py_error!(PyKeyError, "cache is empty")) + } else { + let nth = fastrand::usize(0..self.table.len()); + + let bucket = unsafe { self.table.iter().nth(nth).unwrap_unchecked() }; + + shared.generation_version().increment(); + + let (handle, _) = unsafe { self.table.remove(bucket) }; + self.currsize = self.currsize.saturating_sub(handle.size()); + Ok(handle) + } + } + + #[inline] + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + let initial = self.table.capacity(); + self.table.shrink_to(0, |x| x.key().hash()); + + if initial != self.table.capacity() { + shared.generation_version().increment(); + } + } + + #[inline] + fn clear(&mut self, shared: &Self::Shared) { + if self.table.is_empty() { + return; + } + self.table.clear(); + shared.generation_version().increment(); + self.currsize = 0; + } + + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + let result = unsafe { + let mut iterator = self.table.iter().map(|x| x.as_ref()); + + iterator.all(|handle_1| { + let result = other + .table + .get(handle_1.key().hash(), |x| handle_1.key().py_eq(py, x.key())); + + match result { + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + Ok(None) => false, + Ok(Some(handle_2)) => { + let value_1 = handle_1.value(); + let value_2 = handle_2.value(); + + match utils::pyobject_equal(py, value_1.as_ptr(), value_2.as_ptr()) { + Ok(result) => result, + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + } + } + } + }) + }; + + if let Some(error) = error { + return Err(error); + } + Ok(result) + } + + fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.table.capacity()); + + unsafe { + for handle in self.table.iter().map(|x| x.as_ref()) { + table.insert_no_grow(handle.key().hash(), handle.clone_ref(py)); + } + } + + Self { + table, + currsize: self.currsize, + } + } +} diff --git a/src/pyclasses/mod.rs b/src/pyclasses/mod.rs index 1072b2d..b7e7792 100644 --- a/src/pyclasses/mod.rs +++ b/src/pyclasses/mod.rs @@ -1,3 +1,4 @@ pub mod base; pub mod cache; pub mod fifocache; +pub mod rrcache; diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs new file mode 100644 index 0000000..5ce54a4 --- /dev/null +++ b/src/pyclasses/rrcache.rs @@ -0,0 +1,638 @@ +use crate::internal::alias; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::rrpolicy; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyRRCache as "RRCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyRRCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `RRCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. Zero means unlimited. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let wrapped = Wrapped::new( + rrpolicy::RRPolicy::new(capacity), + rrpolicy::Shared::new(maxsize, getsizeof), + ); + + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| rrpolicy::Handle::new(py, &getsizeof, key, value), + ); + self.0.set(wrapped); + result + } else { + self.0.set(wrapped); + Ok(()) + } + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + inner.policy().current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() * std::mem::size_of::() + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + /// + /// Note: raises `OverflowError` if the cache reached the maxsize limit, + /// because this class does not have any algorithm. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let handle = rrpolicy::Handle::new(py, inner.shared().getsizeof(), key, value)?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let inner = slf.0.get(); + let getsizeof = inner.shared().getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| rrpolicy::Handle::new(py, &getsizeof, key, value), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get<'p>( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument<'p>, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x.unbind(), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = rrpolicy::Handle::with_precomputed_hash_key( + py, + shared.getsizeof(), + key, + default_object.clone_ref(py), + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + /// + /// NOTE: `Cache` always raises `NotImplementedError` because has neither policy nor algorithm to evict items. + fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(py, inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyRRCacheItems { + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyRRCacheValues { + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyRRCacheKeys { + iter: parking_lot::Mutex::new(unsafe { inner.policy().table().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + #[inline] + fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.keys(py) + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + let iter = unsafe { + policy + .table() + .iter() + .map(|bucket| bucket.as_ref()) + .map(|handle| { + ( + // Without using `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }) + }; + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[{}/{}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + policy.current_size(), + shared.maxsize(), + items + ) + } + + #[inline] + fn random_key(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + let policy = inner.policy(); + + if policy.table().is_empty() { + Err(new_py_error!(PyKeyError, "cache is empty")) + } else { + let nth = fastrand::usize(0..policy.table().len()); + + let bucket = unsafe { policy.table().iter().nth(nth).unwrap_unchecked() }; + + let handle = unsafe { bucket.as_ref() }; + Ok(handle.key().clone_ref(py).into()) + } + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + let inner = self.0.get(); + let policy = inner.policy(); + + for handle_ref in unsafe { policy.table().iter() } { + let handle = unsafe { handle_ref.as_ref() }; + + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] + $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let mut iter = slf.iter.lock(); + + match iter.next() { + Some(x) => { + let $py = slf.py(); + let $handle = unsafe { x.as_ref() }; + Ok($init) + } + None => return Err(new_py_error!(PyStopIteration, ())), + } + } + } + )+ + }; +} +implement_iterator!( + PyRRCacheItems as "rrcache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyRRCacheKeys as "rrcache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyRRCacheValues as "rrcache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/tests/cache.py b/tests/cache.py deleted file mode 100644 index e52db21..0000000 --- a/tests/cache.py +++ /dev/null @@ -1,78 +0,0 @@ -import typing - -import pytest - -import cachebox - -from . import mixins - - -class TestCache( - mixins.InitializeMixin, - mixins.InsertAndGetMixin, - mixins.SetDefaultMixin, - mixins.PopAndDeleteMixin, - mixins.UpdateMixin, - mixins.IntrospectionMixin, - mixins.IterationMixin, - mixins.DrainClearShrinkMixin, - mixins.CopyMixin, - mixins.GetSizeOfMixin, - mixins.EdgeCasesMixin, - mixins.IssuesMixin, - mixins.FuzzyMixin, -): - def create_cache( - self, - maxsize: int = 10, - iterable: typing.Any = None, - capacity: int = 0, - getsizeof: typing.Any = None, - ) -> cachebox.BaseCacheImpl: - return cachebox.Cache(maxsize, iterable, capacity=capacity, getsizeof=getsizeof) - - def test_popitem_overflow_error(self): - cache = self.create_cache() - - # cachebox.Cache does not have any algorithm to use - with pytest.raises(OverflowError): - cache.popitem() - - def test_insert_overflow_error(self): - cache = self.create_cache(5) - - for i in range(5): - cache.insert(i, i) - - with pytest.raises(OverflowError): - cache.insert(6, 6) - - cache.insert(4, "A") # <- Replacing should be OK - - # Try again with custom getsizeof - cache = self.create_cache(5, getsizeof=lambda k, v: len(k)) - cache.insert("AA", 1) - cache.insert("BBB", 1) # <- Now is full - - assert cache.is_full() - - with pytest.raises(OverflowError): - cache.insert("NEW", 1) - - cache.insert("AA", "A") # <- Replacing should be OK - - def test_update_overflow_error(self): - with pytest.raises(OverflowError): - self.create_cache(5, {i: i for i in range(6)}) - - cache = self.create_cache(5) - cache.update({i: i for i in range(5)}) # <- Now is full - - with pytest.raises(OverflowError): - cache.insert(6, 6) - - with pytest.raises(OverflowError): - cache.update({10: 10}) - - # Replacing should be OK - cache.update({i: i for i in range(5)}) diff --git a/tests/mixins.py b/tests/mixins.py index c93aadb..1f7a2eb 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -147,6 +147,29 @@ class AType: assert cache[AType] is AType +class PopitemMixin(BaseMixin): + def test_popitem_raises_keyerror(self): + cache = self.create_cache() + + with pytest.raises(KeyError): + cache.popitem() + + def test_popitem_updates_currsize(self): + cache = self.create_cache(10, {i: i for i in range(20)}) + + assert cache.is_full() + assert cache.remaining_size() == 0 + assert cache.current_size() == 10 + assert len(cache) == 10 + + cache.popitem() + + assert not cache.is_full() + assert cache.remaining_size() == 1 + assert cache.current_size() == 9 + assert len(cache) == 9 + + class SetDefaultMixin(BaseMixin): def test_setdefault_inserts_when_absent(self): cache = self.create_cache() diff --git a/tests/fifocache.py b/tests/test_impls.py similarity index 76% rename from tests/fifocache.py rename to tests/test_impls.py index 8edd5a6..c32653e 100644 --- a/tests/fifocache.py +++ b/tests/test_impls.py @@ -7,9 +7,81 @@ from . import mixins +class TestCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.BaseCacheImpl: + return cachebox.Cache(maxsize, iterable, capacity=capacity, getsizeof=getsizeof) + + def test_popitem_overflow_error(self): + cache = self.create_cache() + + # cachebox.Cache does not have any algorithm to use + with pytest.raises(OverflowError): + cache.popitem() + + def test_insert_overflow_error(self): + cache = self.create_cache(5) + + for i in range(5): + cache.insert(i, i) + + with pytest.raises(OverflowError): + cache.insert(6, 6) + + cache.insert(4, "A") # <- Replacing should be OK + + # Try again with custom getsizeof + cache = self.create_cache(5, getsizeof=lambda k, v: len(k)) + cache.insert("AA", 1) + cache.insert("BBB", 1) # <- Now is full + + assert cache.is_full() + + with pytest.raises(OverflowError): + cache.insert("NEW", 1) + + cache.insert("AA", "A") # <- Replacing should be OK + + def test_update_overflow_error(self): + with pytest.raises(OverflowError): + self.create_cache(5, {i: i for i in range(6)}) + + cache = self.create_cache(5) + cache.update({i: i for i in range(5)}) # <- Now is full + + with pytest.raises(OverflowError): + cache.insert(6, 6) + + with pytest.raises(OverflowError): + cache.update({10: 10}) + + # Replacing should be OK + cache.update({i: i for i in range(5)}) + + class TestFIFOCache( mixins.InitializeMixin, mixins.InsertAndGetMixin, + mixins.PopitemMixin, mixins.SetDefaultMixin, mixins.PopAndDeleteMixin, mixins.UpdateMixin, @@ -216,6 +288,7 @@ def test_len_never_exceeds_maxsize(self): cache = self.create_cache(5) for i in range(100): cache[i] = i + assert len(cache) <= 5 def test_clear_resets_fifo_order(self): @@ -287,3 +360,48 @@ def test_edge_case_of_front_offset_overflow(self): # popitem() must still yield the oldest entry oldest_key, oldest_val = cache.popitem() assert oldest_val == oldest_key * 10 + + +class TestRRCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.PopitemMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.RRCache: + return cachebox.RRCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + def test_random_key_method(self): + cache = self.create_cache(10) + + with pytest.raises(KeyError): + cache.random_key() + + cache["a"] = 1 + assert cache.random_key() == "a" + + cache["b"] = 2 + cache["c"] = 3 + cache["d"] = 4 + assert cache.random_key() in ("a", "b", "c", "d") From 72650a0f3afa1b8572d7e78f339590b70039b071 Mon Sep 17 00:00:00 2001 From: awolverp Date: Sat, 23 May 2026 15:09:34 +0330 Subject: [PATCH 14/60] Refactor LRUCache --- Cargo.toml | 2 +- Makefile | 2 +- cachebox/__init__.py | 1 + cachebox/_core.pyi | 230 ++++++++++- src/internal/linked_list.rs | 453 ++++++++++++++++++++++ src/internal/mod.rs | 1 + src/lib.rs | 17 +- src/policies/common.rs | 79 ++++ src/policies/fifopolicy.rs | 93 +---- src/policies/lrupolicy.rs | 346 +++++++++++++++++ src/policies/mod.rs | 1 + src/policies/rrpolicy.rs | 2 + src/pyclasses/cache.rs | 3 +- src/pyclasses/fifocache.rs | 27 +- src/pyclasses/lrucache.rs | 733 ++++++++++++++++++++++++++++++++++++ src/pyclasses/mod.rs | 1 + src/pyclasses/rrcache.rs | 43 ++- tests/test_impls.py | 237 +++++++++++- 18 files changed, 2163 insertions(+), 108 deletions(-) create mode 100644 src/internal/linked_list.rs create mode 100644 src/policies/lrupolicy.rs create mode 100644 src/pyclasses/lrucache.rs diff --git a/Cargo.toml b/Cargo.toml index 3c99da4..44a90a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,7 @@ pyo3-build-config = {version="0.28.3", default-features=false, features=["resolv default = ["inline-more", "extension-module"] inline-more = [] extension-module = ["pyo3/extension-module"] -fifocache-small-offset = [] +small-offset = [] [lints.clippy] dbg_macro = "warn" diff --git a/Makefile b/Makefile index 431394b..1b62907 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ help: .PHONY: build-test build-test: - maturin develop --features "fifocache-small-offset" + maturin develop --features "small-offset" .PHONY: build-dev build-dev: diff --git a/cachebox/__init__.py b/cachebox/__init__.py index 2fa87b4..e50ab6f 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -1,6 +1,7 @@ from ._core import BaseCacheImpl as BaseCacheImpl from ._core import Cache as Cache from ._core import FIFOCache as FIFOCache +from ._core import LRUCache as LRUCache from ._core import RRCache as RRCache try: diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index e7726d8..81ae087 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -390,18 +390,65 @@ class FIFOCache(BaseCacheImpl[KT, VT]): def first(self, n: int = 0) -> typing.Optional[KT]: """ Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). - By using `n` parameter, you can browse order index by index. + + Raises `IndexError` if cache is empty, or `n` is out of range. """ ... def last(self) -> typing.Optional[KT]: """ Returns the last key in cache. Equals to `self.first(-1)`. + + Raises `IndexError` if cache is empty. """ ... class RRCache(BaseCacheImpl[KT, VT]): + """ + A thread-safe, memory-efficient key-value cache with Random Replacement eviction policy. + When the cache reaches its maximum size, an item is randomly selected and + evicted to make room for new entries. + + ## How It Works + `RRCache` is a configurable hashmap-like store with automatic eviction. When an item is inserted: + - It is stored directly without any ordering or priority tracking. + - If a maximum size is configured and the cache is full, a random entry is evicted to make room + for the new item. + - All read and write operations are thread-safe, making it safe for concurrent access without + external locking. + + The Random Replacement policy selects entries for eviction uniformly at random, ensuring fair + treatment across all cached items regardless of access patterns. + + ### Pros + - Low overhead: Random Replacement is computationally cheap compared to tracking access order or frequency. + - Thread-safe: safe for concurrent reads and writes out of the box. + - Configurable capacity: a hard size limit prevents unbounded memory growth while allowing new entries + through automatic eviction. + - No staleness issues: items persist only as long as they remain unselected by the eviction policy, + preventing indefinite accumulation of stale data. + + ### Cons + - Non-deterministic eviction: random selection means you cannot predict which entry will be removed, + potentially evicting recently cached or frequently accessed items. + - Unordered: insertion order is not preserved. + - Less optimal than LRU/LFU: for workloads with skewed access patterns, Random Replacement will + evict frequently used items more often than policy-aware caches. + + ## When to Use It + `RRCache` is the right choice when: + - You have a working set that can grow unpredictably and requires automatic memory management. + - Access patterns are relatively uniform and predictable, so random eviction is not significantly + worse than smarter policies. + - You need low computational overhead and simple eviction logic. + - You want to prevent unbounded memory growth without the complexity of tracking usage metadata. + + Avoid it when you have highly skewed access patterns (where certain items are accessed far more + frequently than others), when cache hits are mission-critical and predictability matters, or when + you need fine-grained control over what gets evicted. + """ + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ Equals to `self[key] = value`, but returns a value: @@ -486,3 +533,184 @@ class RRCache(BaseCacheImpl[KT, VT]): Raises `KeyError` If the cache is empty. """ ... + +class LRUCache(BaseCacheImpl[KT, VT]): + """ + A Least-Recently-Used (LRU) cache eviction policy: when the cache is full, + the item that has not been accessed for the longest time is removed first, + regardless of how many times it was accessed in the past. + + ## How It Works + The LRU algorithm is one of the most widely used cache eviction strategies in + practice. Items are tracked by their access recency—every time an item is read + or written, it becomes the most recently used. When the cache reaches capacity, + the least recently used item (the one that was accessed longest ago) is + evicted to make room for new entries. + + This implementation pairs a doubly-linked list with a hash map. The linked list + maintains items in access order: the most recently used item sits at the back, + and the least recently used at the front. The hash map stores pointers (cursors) + into this list, enabling O(1) key lookups. On every access—read or write—the + accessed item is moved to the back of the list, promoting it to "most recently used" + status. When eviction is needed, the front item is removed. + + The doubly-linked list structure is critical: it permits O(1) removal and + reinsertion of any item anywhere in the ordering, without requiring a full rebuild + or index shifting. A running total tracks the current size of cached items, + allowing capacity checks in constant time. + + ### Pros + - **Excellent hit rates on temporal locality.** Workloads where recently or + frequently accessed items are likely to be needed again soon benefit dramatically + from LRU's recency-aware eviction. Real-world caches (CPU L1/L2, database + buffers, CDN edges) rely on this principle. + - **Insert, lookup, and evict are all O(1) amortized.** The doubly-linked list + and hash map combination guarantees no per-operation index shifting or traversals. + - **Automatic adaptation to access patterns.** Hot keys naturally migrate to the + back of the list and stay there, while cold keys drift toward eviction. No + manual tuning of weights or thresholds is needed. + - **Per-hit cost is minimal.** While LRU does require bookkeeping on reads (moving + an item to the back), this bookkeeping is O(1) and adds negligible overhead to most + workloads. + + ### Cons + - **Per-read overhead.** Every cache hit requires updating the linked list (removing + the item from its current position and reinserting it at the back), which is + measurably slower than FIFO's zero-cost hits on read-heavy workloads. + - **Burst traffic can skew eviction.** A single item accessed many times in rapid + succession will be kept alive indefinitely, even if other keys have better long-term + utility. Recency is a proxy for future use, not a guarantee. + - **Implementation complexity.** The doubly-linked list and cursor-based hash table add + internal complexity compared to simpler policies like FIFO. + - **Memory overhead.** Storing doubly-linked pointers (prev/next) for every cached item + consumes extra memory compared to array-based alternatives. + + ## When to use it + Reach for `LRUPolicy` when: + - Your workload exhibits temporal locality—recently accessed items are likely to be + needed again soon. Databases, web caches, and CPU caches all exhibit this pattern. + - Hit rate is your primary metric. If maximizing the proportion of requests served + from the cache matters more than minimizing per-hit latency, LRU is typically the + best general-purpose choice. + - Access patterns are unknown or unpredictable. LRU's automatic adaptation makes it a safe + default when you cannot statically analyze what keys will be hot. + - You need a standard, battle-tested algorithm. LRU is the de facto eviction policy in most + production systems; it is well-understood, widely supported, and easy to reason about. + + Avoid it when: + - Your workload is write-heavy with few or no re-reads. FIFO's zero per-hit bookkeeping + will outperform LRU if the cache is rarely hit. + - You need sub-microsecond latency on every operation. The linked-list manipulation on each + read can add measurable overhead in ultra-low-latency systems. + - Access patterns are bimodal or exhibit frequency-heavy behavior (a small set of items is + accessed far more often than others). An LFU policy may deliver better hit rates in such cases. + """ + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + + It's recommended to use this method instead of `self[key] = value`, as it keeps code + compatible across different cache policies. + """ + ... + + def update(self, iterable: _IterableType[KT, VT]) -> None: + """ + Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + """ + ... + + def get( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a given key from the cache. + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts key with a value of default if key is not in the cache. + + Returns the value for key if key is in the cache, else default. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes the least recently used item from the cache and returns it as a (key, value) tuple. + Raises KeyError if the cache is empty. + """ + ... + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + - Items are ordered. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Keys are ordered. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Values are ordered. + """ + ... + + def peek( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a given key from the cache (without promoting the key). + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + """ + + def least_recently_used(self) -> typing.Optional[KT]: + """ + Returns the key in the cache that has not been accessed in the longest time. + + Raises `KeyError` if cache is empty. + """ + ... + + def most_recently_used(self) -> typing.Optional[KT]: + """ + Returns the key in the cache that has been accessed in the shortest time. + + Raises `KeyError` if cache is empty. + """ + ... diff --git a/src/internal/linked_list.rs b/src/internal/linked_list.rs new file mode 100644 index 0000000..d7386ed --- /dev/null +++ b/src/internal/linked_list.rs @@ -0,0 +1,453 @@ +use std::marker::PhantomData; +use std::mem; +use std::ptr::NonNull; + +pub struct Node { + next: Option>>, + prev: Option>>, + element: T, +} + +impl Node { + fn new(element: T) -> Self { + Node { + next: None, + prev: None, + element, + } + } + + fn into_element(self: Box) -> T { + self.element + } + + pub fn element<'a>(&'a self) -> &'a T { + &self.element + } +} + +/// A doubly-linked list with owned nodes. +/// +/// The `LinkedList` allows pushing and popping elements at either end +/// in constant time. +pub struct LinkedList { + head: Option>>, + tail: Option>>, + len: usize, + marker: PhantomData>>, +} + +// private methods +impl LinkedList { + /// Adds the given node to the front of the list. + /// + /// # Safety + /// `node` must point to a valid node that was boxed and leaked using the list's allocator. + /// This method takes ownership of the node, so the pointer should not be used again. + #[inline] + unsafe fn push_front_node(&mut self, node: NonNull>) { + // This method takes care not to create mutable references to whole nodes, + // to maintain validity of aliasing pointers into `element`. + unsafe { + (*node.as_ptr()).next = self.head; + (*node.as_ptr()).prev = None; + let node = Some(node); + + match self.head { + None => self.tail = node, + // Not creating new mutable (unique!) references overlapping `element`. + Some(head) => (*head.as_ptr()).prev = node, + } + + self.head = node; + self.len += 1; + } + } + + /// Removes and returns the node at the front of the list. + #[inline] + fn pop_front_node(&mut self) -> Option>> { + // This method takes care not to create mutable references to whole nodes, + // to maintain validity of aliasing pointers into `element`. + self.head.map(|node| unsafe { + let node = Box::from_raw(node.as_ptr()); + self.head = node.next; + + match self.head { + None => self.tail = None, + // Not creating new mutable (unique!) references overlapping `element`. + Some(head) => (*head.as_ptr()).prev = None, + } + + self.len -= 1; + node + }) + } + + /// Adds the given node to the back of the list. + /// + /// # Safety + /// `node` must point to a valid node that was boxed and leaked using the list's allocator. + /// This method takes ownership of the node, so the pointer should not be used again. + #[inline] + unsafe fn push_back_node(&mut self, node: NonNull>) { + // This method takes care not to create mutable references to whole nodes, + // to maintain validity of aliasing pointers into `element`. + unsafe { + (*node.as_ptr()).next = None; + (*node.as_ptr()).prev = self.tail; + let node = Some(node); + + match self.tail { + None => self.head = node, + // Not creating new mutable (unique!) references overlapping `element`. + Some(tail) => (*tail.as_ptr()).next = node, + } + + self.tail = node; + self.len += 1; + } + } + + /// Removes and returns the node at the back of the list. + #[inline] + fn pop_back_node(&mut self) -> Option>> { + // This method takes care not to create mutable references to whole nodes, + // to maintain validity of aliasing pointers into `element`. + self.tail.map(|node| unsafe { + let node = Box::from_raw(node.as_ptr()); + self.tail = node.prev; + + match self.tail { + None => self.head = None, + // Not creating new mutable (unique!) references overlapping `element`. + Some(tail) => (*tail.as_ptr()).next = None, + } + + self.len -= 1; + node + }) + } + + /// Unlinks the specified node from the current list. + /// + /// Warning: this will not check that the provided node belongs to the current list. + /// + /// This method takes care not to create mutable references to `element`, to + /// maintain validity of aliasing pointers. + #[inline] + unsafe fn unlink_node(&mut self, mut node: NonNull>) { + let node = unsafe { node.as_mut() }; // this one is ours now, we can create an &mut. + + // Not creating new mutable (unique!) references overlapping `element`. + match node.prev { + Some(prev) => unsafe { (*prev.as_ptr()).next = node.next }, + // this node is the head node + None => self.head = node.next, + }; + + match node.next { + Some(next) => unsafe { (*next.as_ptr()).prev = node.prev }, + // this node is the tail node + None => self.tail = node.prev, + }; + + self.len -= 1; + } + + /// Unlinks the specified node from the current list and returns the item. + /// + /// # Safety + /// This will not check that the provided node belongs to the current list. + unsafe fn remove_node(&mut self, node: NonNull>) -> T { + unsafe { + self.unlink_node(node); + let node = Box::from_raw(node.as_ptr()); + node.element + } + } +} + +impl Default for LinkedList { + /// Creates an empty `LinkedList`. + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl LinkedList { + /// Creates an empty `LinkedList`. + #[inline] + #[must_use] + pub const fn new() -> Self { + LinkedList { + head: None, + tail: None, + len: 0, + marker: PhantomData, + } + } + + /// Returns `true` if the `LinkedList` is empty. + /// + /// This operation should compute in *O*(1) time. + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.head.is_none() + } + + /// Returns the length of the `LinkedList`. + /// + /// This operation should compute in *O*(1) time. + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.len + } + + /// Removes all elements from the `LinkedList`. + /// + /// This operation should compute in *O*(*n*) time. + #[inline] + pub fn clear(&mut self) { + drop(LinkedList { + head: self.head.take(), + tail: self.tail.take(), + len: mem::take(&mut self.len), + marker: PhantomData, + }); + } + + /// Returns a [`Cursor`] to the front node, or `None` if the list is empty. + #[inline] + #[must_use] + pub fn cursor_front(&self) -> Option> { + self.head.map(Cursor::new) + } + + /// Returns a [`Cursor`] to the back node, or `None` if the list is empty. + #[inline] + #[must_use] + pub fn cursor_back(&self) -> Option> { + self.tail.map(Cursor::new) + } + + /// Adds an element to the front of the list and returns a [`Cursor`] to it. + /// + /// This operation should compute in *O*(1) time. + #[inline] + pub fn push_front(&mut self, elt: T) -> Cursor { + let node = Box::new(Node::new(elt)); + let node_ptr = NonNull::from(Box::leak(node)); + + // SAFETY: node_ptr is a unique pointer to a node we boxed with self.alloc and leaked + unsafe { + self.push_front_node(node_ptr); + } + Cursor::new(node_ptr) + } + + /// Removes the first element and returns it, or `None` if the list is + /// empty. + /// + /// This operation should compute in *O*(1) time. + #[inline] + pub fn pop_front(&mut self) -> Option { + self.pop_front_node().map(Node::into_element) + } + + /// Adds an element to the back of the list and returns a [`Cursor`] to it. + /// + /// This operation should compute in *O*(1) time. + #[inline] + pub fn push_back(&mut self, elt: T) -> Cursor { + let node = Box::new(Node::new(elt)); + let node_ptr = NonNull::from(Box::leak(node)); + + // SAFETY: node_ptr is a unique pointer to a node we boxed with self.alloc and leaked + unsafe { + self.push_back_node(node_ptr); + } + Cursor::new(node_ptr) + } + + /// Removes the last element from a list and returns it, or `None` if + /// it is empty. + /// + /// This operation should compute in *O*(1) time. + #[inline] + pub fn pop_back(&mut self) -> Option { + self.pop_back_node().map(Node::into_element) + } + + /// Returns a raw, lifetime-free iterator over the nodes of a LinkedList. + /// + /// # Safety + /// The iterator must not outlive the list it was created from, and the list must not be structurally modified. + pub unsafe fn iter(&self) -> RawIter { + RawIter { + head: self.head, + len: self.len, + } + } +} + +unsafe impl<#[may_dangle] T> Drop for LinkedList { + fn drop(&mut self) { + struct DropGuard<'a, T>(&'a mut LinkedList); + + impl<'a, T> Drop for DropGuard<'a, T> { + fn drop(&mut self) { + // Continue the same loop we do below. This only runs when a destructor has + // panicked. If another one panics this will abort. + while self.0.pop_front_node().is_some() {} + } + } + + // Wrap self so that if a destructor panics, we can try to keep looping + let guard = DropGuard(self); + while guard.0.pop_front_node().is_some() {} + mem::forget(guard); + } +} + +/// An opaque handle to a node in a [`LinkedList`]. +/// +/// Obtained via [`LinkedList::push_front`], [`LinkedList::push_back`], +/// [`LinkedList::cursor_front`], or [`LinkedList::cursor_back`]. +/// +/// `Cursor` is `Copy`; cloning or copying it produces a second handle to the +/// *same* node. Two cursors compare equal iff they point at the same node. +/// +/// # Safety invariant +/// Every `unsafe` method on `Cursor` requires that: +/// - the cursor was obtained from the list it is passed to, **and** +/// - the node has not yet been removed from that list. +/// +/// Violating either condition is undefined behaviour. +#[repr(transparent)] +pub struct Cursor(NonNull>); + +// `NonNull>` is just a pointer; copying it is always safe. +impl Clone for Cursor { + #[inline] + fn clone(&self) -> Self { + *self + } +} +impl Copy for Cursor {} + +// Pointer equality: two cursors are equal if they point at the same node. +impl PartialEq for Cursor { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} +impl Eq for Cursor {} + +impl Cursor { + #[inline] + fn new(node: NonNull>) -> Self { + Cursor(node) + } + + /// Returns a shared reference to the element this cursor points to. + /// + /// # Safety + /// See the [struct-level safety invariant](Cursor). + /// The returned reference borrows for `'a`, which the caller must + /// ensure does not outlive the node or the list. + #[inline] + pub unsafe fn element<'a>(&self) -> &'a T { + &(*self.0.as_ptr()).element + } + + /// Returns a mutable reference to the element this cursor points to. + /// + /// # Safety + /// See the [struct-level safety invariant](Cursor). + /// In addition, no other reference to this element may exist for the + /// duration of the returned `'a` borrow. + #[inline] + pub unsafe fn element_mut<'a>(&mut self) -> &'a mut T { + &mut (*self.0.as_ptr()).element + } + + /// Moves this node to the front of `list`. + /// + /// # Safety + /// See the [struct-level safety invariant](Cursor). + #[inline] + pub unsafe fn move_to_front(self, list: &mut LinkedList) { + list.unlink_node(self.0); + list.push_front_node(self.0); + } + + /// Moves this node to the back of `list`. + /// + /// # Safety + /// See the [struct-level safety invariant](Cursor). + #[inline] + pub unsafe fn move_to_back(self, list: &mut LinkedList) { + list.unlink_node(self.0); + list.push_back_node(self.0); + } + + /// Unlinks this node from `list` and returns its element. + /// + /// Consumes the cursor so it cannot be used after removal. + /// + /// # Safety + /// See the [struct-level safety invariant](Cursor). + #[inline] + pub unsafe fn unlink(self, list: &mut LinkedList) -> T { + list.remove_node(self.0) + } +} + +/// A raw, lifetime-free iterator over the nodes of a [`LinkedList`]. +/// +/// Yields a [`Cursor`] for each node, from front to back. +/// +/// Obtained via [`LinkedList::iter`]. +/// +/// # Safety invariant +/// The iterator must not outlive the list it was created from, and the list +/// must not be structurally modified (nodes added or removed) while iterating. +/// Violating either condition is undefined behaviour. +pub struct RawIter { + head: Option>>, + len: usize, +} + +impl Iterator for RawIter { + type Item = Cursor; + + #[inline] + fn next(&mut self) -> Option> { + if self.len == 0 { + return None; + } + self.head.map(|node| { + self.len -= 1; + // SAFETY: node is a valid, live pointer for as long as the list lives. + self.head = unsafe { (*node.as_ptr()).next }; + Cursor::new(node) + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.len, Some(self.len)) + } +} + +unsafe impl Send for LinkedList {} +unsafe impl Sync for LinkedList {} +unsafe impl Send for RawIter {} +unsafe impl Sync for RawIter {} +unsafe impl Send for Cursor {} +unsafe impl Sync for Cursor {} diff --git a/src/internal/mod.rs b/src/internal/mod.rs index ffb56f2..852e7d8 100644 --- a/src/internal/mod.rs +++ b/src/internal/mod.rs @@ -1,4 +1,5 @@ pub mod alias; +pub mod linked_list; pub mod onceinit; pub mod pickle; pub mod utils; diff --git a/src/lib.rs b/src/lib.rs index 4fc4896..fe5f01f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,12 +11,6 @@ pub mod internal; pub mod policies; pub mod pyclasses; -// fn _fifocache_small_offset_attribute( -// m: &pyo3::Bound<'_, pyo3::types::PyModule>, -// ) -> pyo3::PyResult<()> { - -// } - #[pyo3::pymodule] mod _core { #[allow(unused_imports)] @@ -56,11 +50,20 @@ mod _core { #[pymodule_export] use crate::pyclasses::rrcache::PyRRCacheValues; + #[pymodule_export] + use crate::pyclasses::lrucache::PyLRUCache; + #[pymodule_export] + use crate::pyclasses::lrucache::PyLRUCacheItems; + #[pymodule_export] + use crate::pyclasses::lrucache::PyLRUCacheKeys; + #[pymodule_export] + use crate::pyclasses::lrucache::PyLRUCacheValues; + #[pymodule_init] pub fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { typeref::initialize_typeref(m.py()); - #[cfg(feature = "fifocache-small-offset")] + #[cfg(feature = "small-offset")] m.add("_fifocache_small_offset", true)?; Ok(()) diff --git a/src/policies/common.rs b/src/policies/common.rs index ecfc9a6..7d35b8b 100644 --- a/src/policies/common.rs +++ b/src/policies/common.rs @@ -144,3 +144,82 @@ impl traits::SharedExt for Shared { } } } + +/// Immutable slice iterator without lifetime +/// +/// # Safety +/// - You should be sure about lifetimes, and pointers should be alive while this type is alive. +/// Any changes to pointers can cause *Undefined Behaviour*. +/// - It doesn't support `ZST`s. +struct RawSliceIter { + pointer: std::ptr::NonNull, + index: usize, + len: usize, +} + +impl RawSliceIter { + /// Creates a new [`RawSliceIter`] + #[inline] + fn new(slice: &[T]) -> Self { + let pointer: std::ptr::NonNull = std::ptr::NonNull::from(slice).cast(); + + Self { + pointer, + index: 0, + len: slice.len(), + } + } +} + +impl Iterator for RawSliceIter { + type Item = std::ptr::NonNull; + + #[inline] + fn next(&mut self) -> Option { + if self.index >= self.len { + None + } else { + let value = unsafe { self.pointer.add(self.index) }; + self.index += 1; + Some(value) + } + } +} + +unsafe impl Send for RawSliceIter {} +unsafe impl Sync for RawSliceIter {} + +/// Raw iterator for [`VecDeque`] which doesn't have lifetime. +/// +/// # Safety +/// You should track changes of [`VecDeque`] yourself. +pub struct RawVecDequeIter { + first: RawSliceIter, + second: RawSliceIter, +} + +impl RawVecDequeIter { + /// Creates a new [`RawVecDequeIter`] + #[inline] + pub fn new(first: &[T], second: &[T]) -> Self { + Self { + first: RawSliceIter::new(first), + second: RawSliceIter::new(second), + } + } +} + +impl Iterator for RawVecDequeIter { + type Item = std::ptr::NonNull; + + #[inline] + fn next(&mut self) -> Option { + match self.first.next() { + Some(val) => Some(val), + None => { + std::mem::swap(&mut self.first, &mut self.second); + self.first.next() + } + } + } +} diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index e6f94f1..748204e 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -2,6 +2,7 @@ use std::collections::VecDeque; use crate::hashbrown; use crate::internal::utils; +use crate::policies::common::RawVecDequeIter; use crate::policies::traits; use crate::policies::traits::HandleExt; use crate::policies::traits::PolicyExt; @@ -24,6 +25,7 @@ impl traits::EntryExt for Occupied<'_> { type Handle = Handle; type Shared = Shared; + #[inline] fn would_exceed(&self, extra_size: usize) -> bool { let handle = unsafe { &self.policy.entries[*self.bucket.as_ref() - self.policy.front_offset] }; @@ -35,12 +37,14 @@ impl traits::EntryExt for Occupied<'_> { > self.shared.maxsize() } + #[inline] fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { self.policy.evict(py, self.shared) } } impl traits::OccupiedExt for Occupied<'_> { + #[inline] fn replace(self, new: Self::Handle) -> Self::Handle { // In update we don't need to increment this; because this does not change the memory address ranges // self.shared.generation_version().increment(); @@ -57,6 +61,7 @@ impl traits::OccupiedExt for Occupied<'_> { std::mem::replace(item, new) } + #[inline] fn remove(self) -> Self::Handle { let (mut index, _) = unsafe { self.policy.table.remove(self.bucket) }; index -= self.policy.front_offset; @@ -82,10 +87,12 @@ impl traits::EntryExt for Vacant<'_> { type Handle = Handle; type Shared = Shared; + #[inline] fn would_exceed(&self, extra_size: usize) -> bool { self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() } + #[inline] fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { self.policy.evict(py, self.shared) } @@ -110,85 +117,6 @@ impl traits::VacantExt for Vacant<'_> { } } -/// Immutable slice iterator without lifetime -/// -/// # Safety -/// - You should be sure about lifetimes, and pointers should be alive while this type is alive. -/// Any changes to pointers can cause *Undefined Behaviour*. -/// - It doesn't support `ZST`s. -struct RawSliceIter { - pointer: std::ptr::NonNull, - index: usize, - len: usize, -} - -impl RawSliceIter { - /// Creates a new [`RawSliceIter`] - #[inline] - fn new(slice: &[T]) -> Self { - let pointer: std::ptr::NonNull = std::ptr::NonNull::from(slice).cast(); - - Self { - pointer, - index: 0, - len: slice.len(), - } - } -} - -impl Iterator for RawSliceIter { - type Item = std::ptr::NonNull; - - #[inline] - fn next(&mut self) -> Option { - if self.index >= self.len { - None - } else { - let value = unsafe { self.pointer.add(self.index) }; - self.index += 1; - Some(value) - } - } -} - -unsafe impl Send for RawSliceIter {} -unsafe impl Sync for RawSliceIter {} - -/// Raw iterator for [`VecDeque`] which doesn't have lifetime. -/// -/// # Safety -/// You should track changes of [`VecDeque`] yourself. -pub struct RawVecDequeIter { - first: RawSliceIter, - second: RawSliceIter, -} - -impl RawVecDequeIter { - /// Creates a new [`RawVecDequeIter`] - #[inline] - fn new(first: &[T], second: &[T]) -> Self { - Self { - first: RawSliceIter::new(first), - second: RawSliceIter::new(second), - } - } -} - -impl Iterator for RawVecDequeIter { - type Item = std::ptr::NonNull; - - #[inline] - fn next(&mut self) -> Option { - match self.first.next() { - Some(val) => Some(val), - None => { - std::mem::swap(&mut self.first, &mut self.second); - self.first.next() - } - } - } -} - pub struct FIFOPolicy { /// Maps each key to its logical index into [`FIFOPolicy::entries`], enabling O(1) lookups. /// @@ -246,10 +174,10 @@ impl FIFOPolicy { #[inline] fn decrement_indexes(&mut self, start: usize, end: usize) { - #[cfg(not(feature = "fifocache-small-offset"))] + #[cfg(not(feature = "small-offset"))] const MAX_FRONT_OFFSET: usize = usize::MAX - isize::MAX as usize; - #[cfg(feature = "fifocache-small-offset")] + #[cfg(feature = "small-offset")] const MAX_FRONT_OFFSET: usize = u8::MAX as usize; // Fast path: shifting the entire front is a single counter increment. @@ -313,7 +241,7 @@ impl FIFOPolicy { } } -impl traits::PolicyExt for FIFOPolicy { +impl PolicyExt for FIFOPolicy { type Shared = Shared; type Handle = Handle; @@ -379,7 +307,6 @@ impl traits::PolicyExt for FIFOPolicy { } } - #[inline] fn evict(&mut self, py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { let front = self.entries.front(); if front.is_none() { diff --git a/src/policies/lrupolicy.rs b/src/policies/lrupolicy.rs new file mode 100644 index 0000000..f07c345 --- /dev/null +++ b/src/policies/lrupolicy.rs @@ -0,0 +1,346 @@ +use crate::hashbrown; +use crate::internal::linked_list; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; + +pub use super::common::Handle; +pub use super::common::Shared; + +/// A view into an occupied entry in [`LRUPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut LRUPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied index. + bucket: hashbrown::raw::Bucket>, +} + +impl traits::EntryExt for Occupied<'_> { + type Handle = Handle; + type Shared = Shared; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + let handle = unsafe { self.bucket.as_ref().element() }; + + self.policy + .currsize + .saturating_add(extra_size) + .saturating_sub(handle.size()) + > self.shared.maxsize() + } + + #[inline] + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { + self.policy.evict(py, self.shared) + } +} + +impl traits::OccupiedExt for Occupied<'_> { + fn replace(self, new: Self::Handle) -> Self::Handle { + self.shared.generation_version().increment(); + + unsafe { + let mut cursor = *self.bucket.as_ref(); + + self.policy.currsize = self + .policy + .currsize + .saturating_sub(cursor.element().size()) + .saturating_add(new.size()); + + let old = std::mem::replace(cursor.element_mut(), new); + cursor.move_to_back(&mut self.policy.entries); + + old + } + } + + #[inline] + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + + let (cursor, _) = unsafe { self.policy.table.remove(self.bucket) }; + let item = unsafe { cursor.unlink(&mut self.policy.entries) }; + + self.policy.currsize = self.policy.currsize.saturating_sub(item.size()); + item + } +} + +/// A view into a vacant slot in [`LRUPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut LRUPolicy, + /// The shared configuration + shared: &'a Shared, +} + +impl traits::EntryExt for Vacant<'_> { + type Handle = Handle; + type Shared = Shared; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + #[inline] + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { + self.policy.evict(py, self.shared) + } +} + +impl traits::VacantExt for Vacant<'_> { + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = self.policy.entries.push_back(handle); + + self.policy + .table + .insert(hash, cursor, |x| unsafe { x.element().key().hash() }); + } +} + +pub struct LRUPolicy { + /// Maps each key to its node pointer into [`FIFOPolicy::entries`], enabling O(1) lookups. + table: hashbrown::raw::RawTable>, + + /// A doubly-linked list, which holds cached handles, providing O(1) pops (front/back) and pushes (front/back). + entries: linked_list::LinkedList, + + /// Running total of all stored handles' sizes, maintained incrementally. + currsize: usize, +} + +impl LRUPolicy { + /// Creates a new [`LRUPolicy`]. + /// + /// The underlying hash map is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + entries: linked_list::LinkedList::new(), + currsize: 0, + } + } + + #[inline] + pub fn table(&self) -> &hashbrown::raw::RawTable> { + &self.table + } + + #[inline] + pub fn linked_list(&self) -> &linked_list::LinkedList { + &self.entries + } + + #[inline] + pub fn peek( + &self, + py: pyo3::Python, + key: &utils::PrecomputedHashObject, + ) -> pyo3::PyResult> { + unsafe { + let bucket = self + .table + .find(key.hash(), |cursor| key.py_eq(py, cursor.element().key()))?; + + Ok(bucket.map(|x| x.as_ref().element())) + } + } +} + +impl PolicyExt for LRUPolicy { + type Shared = Shared; + type Handle = Handle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + unsafe { + let bucket = self + .table + .find(key.hash(), |cursor| key.py_eq(py, cursor.element().key()))?; + + match bucket { + Some(cursor) => { + cursor.as_mut().move_to_back(&mut self.entries); + Ok(Some(cursor.as_ref().element())) + } + None => Ok(None), + } + } + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + let eq = + |cursor: &linked_list::Cursor| unsafe { key.py_eq(py, cursor.element().key()) }; + + match self.table.find(key.hash(), eq)? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + fn evict(&mut self, _py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { + { + let front_cursor = match self.entries.cursor_front() { + Some(x) => x, + None => return Err(new_py_error!(PyKeyError, "cache is empty")), + }; + + let hash = unsafe { front_cursor.element().key().hash() }; + + shared.generation_version().increment(); + self.table + .remove_entry(hash, |cursor| Ok::<_, pyo3::PyErr>(*cursor == front_cursor)) + .expect("evict: key not found in table."); + } + + let handle = unsafe { self.entries.pop_front().unwrap_unchecked() }; + self.currsize = self.currsize.saturating_sub(handle.size()); + Ok(handle) + } + + #[inline] + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + let initial = self.table.capacity(); + self.table + .shrink_to(0, |cursor| unsafe { cursor.element().key().hash() }); + + if initial != self.table.capacity() { + shared.generation_version().increment(); + } + } + + #[inline] + fn clear(&mut self, shared: &Self::Shared) { + if self.entries.is_empty() { + return; + } + + shared.generation_version().increment(); + self.table.clear_no_drop(); + self.entries.clear(); + self.currsize = 0; + } + + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + let result = unsafe { + let mut iterator = self.table.iter().map(|x| x.as_ref()); + + iterator.all(|cursor_1| { + let handle_1 = cursor_1.element(); + + let result = other.table.get(handle_1.key().hash(), |cursor| { + handle_1.key().py_eq(py, cursor.element().key()) + }); + + match result { + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + Ok(None) => false, + Ok(Some(cursor_2)) => { + let handle_2 = cursor_2.element(); + + let value_1 = handle_1.value(); + let value_2 = handle_2.value(); + + match utils::pyobject_equal(py, value_1.as_ptr(), value_2.as_ptr()) { + Ok(result) => result, + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + } + } + } + }) + }; + + if let Some(error) = error { + return Err(error); + } + Ok(result) + } + + fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.entries.len()); + let mut entries = linked_list::LinkedList::new(); + + unsafe { + for cursor in self.entries.iter() { + let cloned_handle = cursor.element().clone_ref(py); + let new_cursor = entries.push_back(cloned_handle); + table.insert_no_grow(new_cursor.element().key().hash(), new_cursor); + } + } + + Self { + table, + entries, + currsize: self.currsize, + } + } +} diff --git a/src/policies/mod.rs b/src/policies/mod.rs index b7361e9..d8c6784 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -2,6 +2,7 @@ pub mod common; pub mod traits; pub mod fifopolicy; +pub mod lrupolicy; pub mod nopolicy; pub mod rrpolicy; diff --git a/src/policies/rrpolicy.rs b/src/policies/rrpolicy.rs index 668fbb7..175229a 100644 --- a/src/policies/rrpolicy.rs +++ b/src/policies/rrpolicy.rs @@ -40,6 +40,7 @@ impl traits::EntryExt for Occupied<'_> { } impl traits::OccupiedExt for Occupied<'_> { + #[inline] fn remove(self) -> Self::Handle { self.shared.generation_version().increment(); @@ -48,6 +49,7 @@ impl traits::OccupiedExt for Occupied<'_> { h } + #[inline] fn replace(self, new: Self::Handle) -> Self::Handle { self.policy.currsize = self.policy.currsize.saturating_add(new.size()); let old = unsafe { std::mem::replace(self.bucket.as_mut(), new) }; diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index fb6bcdd..3ea4096 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -15,7 +15,6 @@ implement_pyclass! { /// `Cache` is essentially a configurable hashmap-like store. When an item is inserted: /// - It is stored directly without any ordering, priority tracking, or access metadata. /// - If a maximum size is configured, insertions beyond that limit are rejected (raises OverflowError). - /// A max size of zero means unlimited. /// - All read and write operations are thread-safe, making it safe for concurrent access without /// external locking. /// @@ -67,7 +66,7 @@ impl PyCache { /// Initialize a new `Cache` instance. /// /// Args: - /// maxsize: Maximum number of elements the cache can hold. Zero means unlimited. + /// maxsize: Maximum number of elements the cache can hold. /// iterable: Initial data to populate the cache. /// capacity: Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index 33777b7..2a7f1c1 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -1,6 +1,7 @@ use crate::internal::alias; use crate::internal::onceinit; use crate::internal::utils; +use crate::policies::common::RawVecDequeIter; use crate::policies::fifopolicy; use crate::policies::traits::HandleExt; use crate::policies::traits::PolicyExt; @@ -73,7 +74,7 @@ impl PyFIFOCache { /// Initialize a new `FIFOCache` instance. /// /// Args: - /// maxsize: Maximum number of elements the cache can hold. Zero means unlimited. + /// maxsize: Maximum number of elements the cache can hold. /// iterable: Initial data to populate the cache. /// capacity: Pre-allocate capacity to minimize reallocations. Defaults to 0. /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each @@ -574,7 +575,11 @@ impl PyFIFOCache { } #[pyo3(signature = (n=0))] - fn first(&self, py: pyo3::Python, mut n: pyo3::ffi::Py_ssize_t) -> Option { + fn first( + &self, + py: pyo3::Python, + mut n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { let inner = self.0.get(); let policy = inner.policy(); @@ -582,18 +587,22 @@ impl PyFIFOCache { n = (policy.vecdeque().len() as isize) + n; } if n < 0 { - return None; + return Err(new_py_error!(PyIndexError, "`n` out of range")); } - let handle = policy.vecdeque().get(n as usize)?; - Some(handle.key().as_ref().clone_ref(py)) + match policy.vecdeque().get(n as usize) { + Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), + None => Err(new_py_error!(PyIndexError, "`n` out of range")), + } } - fn last(&self, py: pyo3::Python) -> Option { + fn last(&self, py: pyo3::Python) -> pyo3::PyResult { let inner = self.0.get(); let policy = inner.policy(); - let handle = policy.vecdeque().back()?; - Some(handle.key().as_ref().clone_ref(py)) + match policy.vecdeque().back() { + Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), + None => Err(new_py_error!(PyIndexError, "`n` out of range")), + } } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { @@ -628,7 +637,7 @@ macro_rules! implement_iterator { $name as $pyname { initial_gv: u32, gv: utils::GenerationVersion, - iter: parking_lot::Mutex>, + iter: parking_lot::Mutex>, } } diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs new file mode 100644 index 0000000..b10be0c --- /dev/null +++ b/src/pyclasses/lrucache.rs @@ -0,0 +1,733 @@ +use crate::internal::alias; +use crate::internal::linked_list; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::lrupolicy; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A Least-Recently-Used (LRU) cache eviction policy: when the cache is full, + /// the item that has not been accessed for the longest time is removed first, + /// regardless of how many times it was accessed in the past. + /// + /// ## How It Works + /// The LRU algorithm is one of the most widely used cache eviction strategies in + /// practice. Items are tracked by their access recency—every time an item is read + /// or written, it becomes the most recently used. When the cache reaches capacity, + /// the least recently used item (the one that was accessed longest ago) is + /// evicted to make room for new entries. + /// + /// This implementation pairs a doubly-linked list with a hash map. The linked list + /// maintains items in access order: the most recently used item sits at the back, + /// and the least recently used at the front. The hash map stores pointers (cursors) + /// into this list, enabling O(1) key lookups. On every access—read or write—the + /// accessed item is moved to the back of the list, promoting it to "most recently used" + /// status. When eviction is needed, the front item is removed. + /// + /// The doubly-linked list structure is critical: it permits O(1) removal and + /// reinsertion of any item anywhere in the ordering, without requiring a full rebuild + /// or index shifting. A running total tracks the current size of cached items, + /// allowing capacity checks in constant time. + /// + /// ### Pros + /// - **Excellent hit rates on temporal locality.** Workloads where recently or + /// frequently accessed items are likely to be needed again soon benefit dramatically + /// from LRU's recency-aware eviction. Real-world caches (CPU L1/L2, database + /// buffers, CDN edges) rely on this principle. + /// - **Insert, lookup, and evict are all O(1) amortized.** The doubly-linked list + /// and hash map combination guarantees no per-operation index shifting or traversals. + /// - **Automatic adaptation to access patterns.** Hot keys naturally migrate to the + /// back of the list and stay there, while cold keys drift toward eviction. No + /// manual tuning of weights or thresholds is needed. + /// - **Per-hit cost is minimal.** While LRU does require bookkeeping on reads (moving + /// an item to the back), this bookkeeping is O(1) and adds negligible overhead to most + /// workloads. + /// + /// ### Cons + /// - **Per-read overhead.** Every cache hit requires updating the linked list (removing + /// the item from its current position and reinserting it at the back), which is + /// measurably slower than FIFO's zero-cost hits on read-heavy workloads. + /// - **Burst traffic can skew eviction.** A single item accessed many times in rapid + /// succession will be kept alive indefinitely, even if other keys have better long-term + /// utility. Recency is a proxy for future use, not a guarantee. + /// - **Implementation complexity.** The doubly-linked list and cursor-based hash table add + /// internal complexity compared to simpler policies like FIFO. + /// - **Memory overhead.** Storing doubly-linked pointers (prev/next) for every cached item + /// consumes extra memory compared to array-based alternatives. + /// + /// ## When to use it + /// Reach for `LRUPolicy` when: + /// - Your workload exhibits temporal locality—recently accessed items are likely to be + /// needed again soon. Databases, web caches, and CPU caches all exhibit this pattern. + /// - Hit rate is your primary metric. If maximizing the proportion of requests served + /// from the cache matters more than minimizing per-hit latency, LRU is typically the + /// best general-purpose choice. + /// - Access patterns are unknown or unpredictable. LRU's automatic adaptation makes it a safe + /// default when you cannot statically analyze what keys will be hot. + /// - You need a standard, battle-tested algorithm. LRU is the de facto eviction policy in most + /// production systems; it is well-understood, widely supported, and easy to reason about. + /// + /// Avoid it when: + /// - Your workload is write-heavy with few or no re-reads. FIFO's zero per-hit bookkeeping + /// will outperform LRU if the cache is rarely hit. + /// - You need sub-microsecond latency on every operation. The linked-list manipulation on each + /// read can add measurable overhead in ultra-low-latency systems. + /// - Access patterns are bimodal or exhibit frequency-heavy behavior (a small set of items is + /// accessed far more often than others). An LFU policy may deliver better hit rates in such cases. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyLRUCache as "LRUCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyLRUCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `FIFOCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let wrapped = Wrapped::new( + lrupolicy::LRUPolicy::new(capacity), + lrupolicy::Shared::new(maxsize, getsizeof), + ); + + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| lrupolicy::Handle::new(py, &getsizeof, key, value), + ); + self.0.set(wrapped); + result + } else { + self.0.set(wrapped); + Ok(()) + } + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + inner.policy().current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + debug_assert!(policy.table().len() == policy.linked_list().len()); + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + let table_cap = policy.table().capacity() * 8; + let list_cap = policy.linked_list().len() * std::mem::size_of::(); + + table_cap + list_cap + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let handle = lrupolicy::Handle::new(py, inner.shared().getsizeof(), key, value)?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let inner = slf.0.get(); + let getsizeof = inner.shared().getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| lrupolicy::Handle::new(py, &getsizeof, key, value), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get<'p>( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument<'p>, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x.unbind(), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = lrupolicy::Handle::with_precomputed_hash_key( + py, + shared.getsizeof(), + key, + default_object.clone_ref(py), + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(py, inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyLRUCacheItems { + iter: parking_lot::Mutex::new(unsafe { inner.policy().linked_list().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyLRUCacheValues { + iter: parking_lot::Mutex::new(unsafe { inner.policy().linked_list().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyLRUCacheKeys { + iter: parking_lot::Mutex::new(unsafe { inner.policy().linked_list().iter() }), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + #[inline] + fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.keys(py) + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + let iter = unsafe { + policy.linked_list().iter().map(|cursor| { + let handle = cursor.element(); + ( + // Without `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }) + }; + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[{}/{}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + policy.current_size(), + shared.maxsize(), + items + ) + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn peek<'p>( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument<'p>, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let policy = inner.policy(); + + if let Some(x) = policy.peek(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + #[inline] + fn least_recently_used(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + let policy = inner.policy(); + + match policy.linked_list().cursor_front() { + Some(cursor) => Ok(unsafe { cursor.element().key().clone_ref(py).into() }), + None => Err(new_py_error!(PyKeyError, "cache is empty")), + } + } + + #[inline] + fn most_recently_used(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + let policy = inner.policy(); + + match policy.linked_list().cursor_back() { + Some(cursor) => Ok(unsafe { cursor.element().key().clone_ref(py).into() }), + None => Err(new_py_error!(PyKeyError, "cache is empty")), + } + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + let inner = self.0.get(); + let policy = inner.policy(); + + for cursor in unsafe { policy.linked_list().iter() } { + let handle = unsafe { cursor.element() }; + + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] + $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let mut iter = slf.iter.lock(); + + match iter.next() { + Some(x) => { + let $py = slf.py(); + let $handle = unsafe { x.element() }; + Ok($init) + } + None => return Err(new_py_error!(PyStopIteration, ())), + } + } + } + )+ + }; +} +implement_iterator!( + PyLRUCacheItems as "lrucache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyLRUCacheKeys as "lrucache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyLRUCacheValues as "lrucache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/src/pyclasses/mod.rs b/src/pyclasses/mod.rs index b7e7792..3d16ad8 100644 --- a/src/pyclasses/mod.rs +++ b/src/pyclasses/mod.rs @@ -1,4 +1,5 @@ pub mod base; pub mod cache; pub mod fifocache; +pub mod lrucache; pub mod rrcache; diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs index 5ce54a4..e6149b2 100644 --- a/src/pyclasses/rrcache.rs +++ b/src/pyclasses/rrcache.rs @@ -8,6 +8,47 @@ use crate::policies::traits::SharedExt; use crate::policies::wrapped::Wrapped; implement_pyclass! { + /// A thread-safe, memory-efficient key-value cache with Random Replacement eviction policy. + /// When the cache reaches its maximum size, an item is randomly selected and + /// evicted to make room for new entries. + /// + /// ## How It Works + /// `RRCache` is a configurable hashmap-like store with automatic eviction. When an item is inserted: + /// - It is stored directly without any ordering or priority tracking. + /// - If a maximum size is configured and the cache is full, a random entry is evicted to make room + /// for the new item. + /// - All read and write operations are thread-safe, making it safe for concurrent access without + /// external locking. + /// + /// The Random Replacement policy selects entries for eviction uniformly at random, ensuring fair + /// treatment across all cached items regardless of access patterns. + /// + /// ### Pros + /// - Low overhead: Random Replacement is computationally cheap compared to tracking access order or frequency. + /// - Thread-safe: safe for concurrent reads and writes out of the box. + /// - Configurable capacity: a hard size limit prevents unbounded memory growth while allowing new entries + /// through automatic eviction. + /// - No staleness issues: items persist only as long as they remain unselected by the eviction policy, + /// preventing indefinite accumulation of stale data. + /// + /// ### Cons + /// - Non-deterministic eviction: random selection means you cannot predict which entry will be removed, + /// potentially evicting recently cached or frequently accessed items. + /// - Unordered: insertion order is not preserved. + /// - Less optimal than LRU/LFU: for workloads with skewed access patterns, Random Replacement will + /// evict frequently used items more often than policy-aware caches. + /// + /// ## When to Use It + /// `RRCache` is the right choice when: + /// - You have a working set that can grow unpredictably and requires automatic memory management. + /// - Access patterns are relatively uniform and predictable, so random eviction is not significantly + /// worse than smarter policies. + /// - You need low computational overhead and simple eviction logic. + /// - You want to prevent unbounded memory growth without the complexity of tracking usage metadata. + /// + /// Avoid it when you have highly skewed access patterns (where certain items are accessed far more + /// frequently than others), when cache hits are mission-critical and predictability matters, or when + /// you need fine-grained control over what gets evicted. [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] PyRRCache as "RRCache" (onceinit::OnceInit>); } @@ -30,7 +71,7 @@ impl PyRRCache { /// Initialize a new `RRCache` instance. /// /// Args: - /// maxsize: Maximum number of elements the cache can hold. Zero means unlimited. + /// maxsize: Maximum number of elements the cache can hold. /// iterable: Initial data to populate the cache. /// capacity: Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each diff --git a/tests/test_impls.py b/tests/test_impls.py index c32653e..d0893e0 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -234,9 +234,11 @@ def test_first_on_single_element_cache(self): assert cache.first() == 42 assert cache.last() == 42 - def test_first_returns_none_on_empty_cache(self): + def test_first_raise_indexerror_on_empty_cache(self): cache = self.create_cache(0) - assert cache.first() is None + + with pytest.raises(IndexError): + cache.first() def test_rolling_window_maintains_correct_contents(self): """ @@ -303,7 +305,7 @@ def test_clear_resets_fifo_order(self): @pytest.mark.skipif( not hasattr(cachebox, "_fifocache_small_offset"), - reason="requires fifocache-small-offset feature flag", + reason="requires small-offset feature flag", ) def test_edge_case_of_front_offset_overflow(self): """ @@ -405,3 +407,232 @@ def test_random_key_method(self): cache["c"] = 3 cache["d"] = 4 assert cache.random_key() in ("a", "b", "c", "d") + + +class TestLRUCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.PopitemMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.LRUCache: + return cachebox.LRUCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + +class TestLRUCachePolicy(mixins.BaseMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.LRUCache: + return cachebox.LRUCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + def test_evicts_lru_when_full(self): + c = self.create_cache(3, {"a": 1, "b": 2, "c": 3}) + c.insert("d", 4) + assert "a" not in c + assert "d" in c + + c = self.create_cache(3, {"a": 1, "b": 2, "c": 3}) + c.insert("a", 1) + c.insert("b", 2) + c.insert("c", 3) + c.insert("d", 4) + assert "a" not in c + assert "d" in c + + def test_does_not_evict_recently_read_key(self): + c = self.create_cache(3) + c.insert("a", 1) + c.insert("b", 2) + c.insert("c", 3) + _ = c["a"] # promote "a" → "b" becomes LRU + c.insert("d", 4) + assert "b" not in c + assert "a" in c + + def test_reinserting_existing_key_promotes_it(self): + c = self.create_cache(3, [("a", 1), ("b", 2), ("c", 3)]) + c.insert("a", 99) # "a" was LRU, now MRU + c.insert("d", 4) # should evict "b", not "a" + assert "a" in c + assert "b" not in c + + def test_cache_never_exceeds_maxsize(self): + c = self.create_cache(5) + for i in range(20): + c.insert(i, i) + assert len(c) <= 5 + + def test_sequential_inserts_keep_only_latest(self): + c = self.create_cache(3) + for i in range(6): + c.insert(i, i) + + for k in range(3): + assert k not in c + + for k in range(3, 6): + assert k in c + + def test_update_evicts_lru_to_make_room(self): + c = self.create_cache(3) + c.insert("a", 1) + c.insert("b", 2) + c.insert("c", 3) + c.update({"d": 4}) + assert "a" not in c + + def test_update_existing_key_promotes_it(self): + c = self.create_cache(3, [("a", 1), ("b", 2), ("c", 3)]) + c.update({"a": 99}) # "a" was LRU, now MRU + c.update({"d": 4}) # should evict "b" + assert "a" in c + assert "b" not in c + + def test_lru_and_mru_key_methods(self): + c = self.create_cache(3) + c.insert("a", 1) + + assert c.least_recently_used() == "a" + assert c.most_recently_used() == "a" + + c.insert("b", 2) + c.insert("c", 3) + + assert c.least_recently_used() == "a" + assert c.most_recently_used() == "c" + + _ = c["a"] # promote "a" + + assert c.least_recently_used() == "b" + assert c.most_recently_used() == "a" + + assert "b" in c # promote "b" + + assert c.least_recently_used() == "c" + assert c.most_recently_used() == "b" + + def test_setdefault_on_existing_key_promotes_it(self): + c = self.create_cache(0, [("a", 1), ("b", 2), ("c", 3)]) + c.setdefault("a", 0) + assert c.most_recently_used() == "a" + + def test_lru_mru_empty_raises(self): + with pytest.raises(KeyError): + self.create_cache(5).least_recently_used() + + with pytest.raises(KeyError): + self.create_cache(5).most_recently_used() + + def test_removes_least_recently_used(self): + c = self.create_cache(0, [("a", 1), ("b", 2), ("c", 3)]) + key, val = c.popitem() + assert key == "a" + assert val == 1 + assert "a" not in c + + def test_order_after_read(self): + c = self.create_cache(0, [("a", 1), ("b", 2), ("c", 3)]) + _ = c["a"] # "a" now MRU → "b" is LRU + key, _ = c.popitem() + assert key == "b" + + def test_order_after_reinsert(self): + c = self.create_cache(0, [("a", 1), ("b", 2), ("c", 3)]) + c.insert("a", 99) # "a" now MRU → "b" is LRU + key, _ = c.popitem() + assert key == "b" + + def test_repeated_popitem_respects_lru_order(self): + c = self.create_cache(5) + for i in range(5): + c.insert(i, i * 10) + + for expected in range(5): + key, _ = c.popitem() + assert key == expected + + def test_empty_raises(self): + with pytest.raises(KeyError): + self.create_cache(5).popitem() + + def test_hot_key_never_evicted(self): + c = self.create_cache(3) + c.insert("hot", 0) + for i in range(20): + _ = c.get("hot") + c.insert(f"cold_{i}", i) + + assert "hot" in c + + def test_mixed_reads_and_writes_evict_correctly(self): + c = self.create_cache(4) + c.insert("a", 1) + c.insert("b", 2) + c.insert("c", 3) + c.insert("d", 4) + _ = c["a"] # order: b, c, d, a + _ = c["c"] # order: b, d, a, c + c.insert("e", 5) # evicts "b" + assert "b" not in c + c.insert("f", 6) # evicts "d" + assert "d" not in c + + def test_peek_existing_key(self): + cache = self.create_cache() + + cache.insert("k", 42) + assert cache.peek("k") == 42 + + def test_peek_missing_key_returns_none(self): + cache = self.create_cache() + + assert cache.peek("nope") is None + + def test_peek_missing_key_returns_custom_default(self): + cache = self.create_cache() + + assert cache.peek("nope", "fallback") == "fallback" + + def test_peek_no_promote_key(self): + c = self.create_cache(3) + c.insert("a", 1) + c.insert("b", 2) + c.insert("c", 3) + + assert c.least_recently_used() == "a" + assert c.most_recently_used() == "c" + + c.peek("a") + + assert c.least_recently_used() == "a" + assert c.most_recently_used() == "c" From dd6ecb28194bec6f2a2311aa08646f110b06e17d Mon Sep 17 00:00:00 2001 From: awolverp Date: Sun, 24 May 2026 12:54:36 +0330 Subject: [PATCH 15/60] Refactor LFU --- cachebox/__init__.py | 1 + cachebox/_core.pyi | 168 +++++++- src/internal/lazyheap.rs | 350 ++++++++++++++++ src/internal/linked_list.rs | 9 +- src/internal/mod.rs | 1 + src/internal/utils.rs | 79 ++++ src/lib.rs | 12 + src/policies/common.rs | 79 ---- src/policies/fifopolicy.rs | 28 +- src/policies/lfupolicy.rs | 477 ++++++++++++++++++++++ src/policies/lrupolicy.rs | 45 +-- src/policies/mod.rs | 1 + src/policies/nopolicy.rs | 8 +- src/policies/rrpolicy.rs | 8 +- src/policies/traits.rs | 10 +- src/pyclasses/fifocache.rs | 25 +- src/pyclasses/lfucache.rs | 773 ++++++++++++++++++++++++++++++++++++ src/pyclasses/lrucache.rs | 20 +- src/pyclasses/mod.rs | 1 + tests/test_impls.py | 31 ++ 20 files changed, 1951 insertions(+), 175 deletions(-) create mode 100644 src/internal/lazyheap.rs create mode 100644 src/policies/lfupolicy.rs create mode 100644 src/pyclasses/lfucache.rs diff --git a/cachebox/__init__.py b/cachebox/__init__.py index e50ab6f..bcd3b11 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -1,6 +1,7 @@ from ._core import BaseCacheImpl as BaseCacheImpl from ._core import Cache as Cache from ._core import FIFOCache as FIFOCache +from ._core import LFUCache as LFUCache from ._core import LRUCache as LRUCache from ._core import RRCache as RRCache diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 81ae087..8b8376c 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -694,9 +694,6 @@ class LRUCache(BaseCacheImpl[KT, VT]): ) -> typing.Union[VT, DT]: """ Retrieves the value for a given key from the cache (without promoting the key). - - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. """ def least_recently_used(self) -> typing.Optional[KT]: @@ -714,3 +711,168 @@ class LRUCache(BaseCacheImpl[KT, VT]): Raises `KeyError` if cache is empty. """ ... + +class LFUCache(BaseCacheImpl[KT, VT]): + """ + A Least-Frequently-Used (LFU) cache eviction policy: when the cache is full, the item + with the lowest access count is evicted first. Ties in frequency are broken by recency - + among equally rare items, the oldest is evicted. + + ## How It Works + The LFU algorithm tracks how many times each cached item has been accessed, and always + evicts the item with the smallest count. This makes it well-suited for workloads where + some items are structurally "hot" and where that frequency signal is stable enough to + be worth preserving across cache pressure events. + + This implementation uses a `lazy binary min-heap` keyed on access frequency, paired with + a `hash map` that maps each key to its cursor (a stable pointer into the heap's backing + buffer). The heap is "lazy" in the sense that it does not restore the heap invariant after + every frequency increment; instead it sets a dirty flag and defers the full re-sort until + the next eviction. This amortises the cost of heap maintenance across many hits, so + read-heavy workloads pay far less per operation than a classic eager heap would require. + + On a cache hit, the item's frequency counter is incremented in O(1) and the heap is marked + dirty. On eviction, the heap is sorted if dirty, and the minimum-frequency item is popped + in O(n log n) worst-case (amortised O(log n) under typical access distributions). Lookups + are O(1) via the hash map. + + ### Pros + - Frequency-aware eviction. Items that are accessed often are protected from eviction even + under heavy cache pressure, leading to higher hit rates on skewed workloads. + - O(1) cache hits. Incrementing a counter and marking the heap dirty is constant-time work, + with no structural reorganisation on the hot path. + - Lazy heap sorting amortises O(n log n) sort cost across many inserts and hits, keeping + the average cost per operation much lower than a naive eager implementation. + + ### Cons + - Eviction is O(n log n) worst-case. If the heap is maximally dirty (every entry modified + since last sort), a single eviction triggers a full re-sort over all entries. This is + amortised away in practice but introduces latency spikes under adversarial access patterns. + - Frequency counters accumulate indefinitely. A key that was hot during an early burst remains + privileged long after traffic shifts, causing "cache pollution" - stale items that monopolise + capacity because of historical frequency, not current utility. + - Access patterns must be skewed for LFU to outperform simpler policies. On uniform workloads, + frequency counters provide no signal and the extra bookkeeping is pure overhead. + + ## When to use it + Reach for `LFUPolicy` when: + - Your workload has a stable hot set: a minority of keys that are accessed disproportionately + often and whose relative popularity changes slowly over time. + - Cache pollution from one-time scans is a concern: LFU naturally resists large sequential reads + from displacing frequently accessed items, because freshly inserted keys start at count 1 and + are evicted before any item with accumulated hits. + - Hit rate matters more than worst-case eviction latency: the amortised cost is low, but if your + system has hard real-time latency requirements, the occasional sort spike may be unacceptable. + + Avoid it when access patterns shift rapidly. If the "hot" subset of keys changes frequently, + frequency counters become stale signals and LFU will evict items that have recently become + popular. In those cases, an LRU policy - which tracks recency rather than frequency - will + adapt faster and typically deliver better hit rates. + + Avoid it on uniform workloads where all keys are accessed with roughly equal probability. + The frequency signal provides no meaningful discrimination, and the overhead of maintaining + counters and a heap is wasted compared to the simpler bookkeeping of FIFO or LRU. + """ + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + + It's recommended to use this method instead of `self[key] = value`, as it keeps code + compatible across different cache policies. + """ + ... + + def update(self, iterable: _IterableType[KT, VT]) -> None: + """ + Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + """ + ... + + def get( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a given key from the cache. + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts key with a value of default if key is not in the cache. + + Returns the value for key if key is in the cache, else default. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes the least recently used item from the cache and returns it as a (key, value) tuple. + Raises KeyError if the cache is empty. + """ + ... + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + - Items are ordered. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Keys are ordered. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Values are ordered. + """ + ... + + def peek( + self, + key: KT, + default: typing.Optional[DT] = ..., + ) -> typing.Union[VT, DT]: + """ + Retrieves the value for a given key from the cache (without frequency increment). + """ + ... + + def least_frequently_used(self, n: int = 0) -> KT: + """ + Returns the key in the cache that has been accessed the least. If n is given, returns the nth least frequently used key. + + Raises `IndexError` if cache is empty, or `n` is out of range. + + Notes: + - This method may re-sort the cache which can cause iterators to be stopped. + - Do not use this method while using iterators. + """ + ... diff --git a/src/internal/lazyheap.rs b/src/internal/lazyheap.rs new file mode 100644 index 0000000..7317c1b --- /dev/null +++ b/src/internal/lazyheap.rs @@ -0,0 +1,350 @@ +use std::ptr::NonNull; + +use crate::internal::utils; + +/// A collection that defers sorting until an ordered operation is requested. +/// +/// Unlike a classic binary heap, `LazyHeap` does not maintain a heap +/// invariant after every insertion. Instead it tracks a dirty flag and +/// re-sorts the entire backing buffer the first time an ordered operation is +/// needed. This amortises well when many insertions occur before any removal, +/// because one `O(n log n)` sort is cheaper than repeated `O(log n)` sift-ups. +/// +/// # Ownership model +/// `LazyHeap` is the **sole owner** of every element it holds. Cursors are +/// purely non-owning handles and must never be used to free the backing +/// allocation. +pub struct LazyHeap { + data: std::collections::VecDeque>, + is_sorted: bool, + _marker: std::marker::PhantomData>, +} + +impl LazyHeap { + /// Pops and owns the front allocation. Does **not** sort. + #[inline] + fn unlink_front(&mut self) -> Option { + let ptr = self.data.pop_front()?; + // SAFETY: LazyHeap owns the sole Box for every pointer it stores. + Some(*unsafe { Box::from_raw(ptr.as_ptr()) }) + } + + /// Pops and owns the back allocation. Does **not** sort. + #[inline] + fn unlink_back(&mut self) -> Option { + let ptr = self.data.pop_back()?; + // SAFETY: LazyHeap owns the sole Box for every pointer it stores. + Some(*unsafe { Box::from_raw(ptr.as_ptr()) }) + } +} + +impl LazyHeap { + /// Creates a new, empty `LazyHeap`. + pub fn new() -> Self { + Self { + data: std::collections::VecDeque::new(), + is_sorted: true, + _marker: std::marker::PhantomData, + } + } + + /// Returns the number of elements in the heap. + #[inline] + pub fn len(&self) -> usize { + self.data.len() + } + + /// Returns `true` if the heap contains no elements. + #[inline] + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } + + /// Inserts `value` into the heap and returns a [`Cursor`] to it. + /// + /// The returned cursor is **non-owning**. Store it in an external structure + /// (e.g. a `hashbrown::RawTable`) for later removal via [`remove`](Self::remove). + /// Never reconstruct a `Box` from it. + /// + /// This call marks the heap as unsorted; the next ordered operation + /// triggers a full sort. + /// + /// # Complexity + /// Amortised O(1). + #[inline] + pub fn push(&mut self, value: T) -> Cursor { + // SAFETY: Box::into_raw is guaranteed non-null. + let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(value))) }; + self.data.push_back(ptr); + self.is_sorted = false; + Cursor(ptr) + } + + /// Marks the heap's order as invalid without re-sorting immediately. + /// + /// Call this after mutating an element's sort key through [`Cursor::as_mut`]. + /// The next ordered operation will then re-sort before proceeding. + #[inline] + pub fn mark_unsorted(&mut self) { + self.is_sorted = false; + } + + /// Sorts the backing buffer with `compare` if it is not already sorted and + /// then returns `true`. + /// + /// All ordered operations call this automatically. You can call it + /// manually to amortise the sort cost before a batch of [`front`](Self::front) / + /// [`get`](Self::get) accesses. + /// + /// # Complexity + /// O(n log n) when unsorted; O(1) when already sorted. + #[inline] + pub fn sort_by(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> bool { + if self.is_sorted { + return false; + } + if self.data.len() > 1 { + // SAFETY: every pointer in `self.data` is a live, heap-owned allocation. + unsafe { + self.data + .make_contiguous() + .sort_by(|a, b| compare(a.as_ref(), b.as_ref())); + } + } + self.is_sorted = true; + true + } + + /// Returns a cursor to the smallest (front) element without removing it, + /// or `None` if the heap is empty. + /// + /// Sorts the heap first if necessary. + #[inline] + pub fn front(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option> { + self.sort_by(compare); + self.data.front().copied().map(Cursor) + } + + /// Returns a cursor to the largest (back) element without removing it, + /// or `None` if the heap is empty. + /// + /// Sorts the heap first if necessary. + #[inline] + pub fn back(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option> { + self.sort_by(compare); + self.data.back().copied().map(Cursor) + } + + /// Returns a cursor to the element at position `index`, or `None` if out + /// of bounds. + /// + /// The index is only meaningful after the heap has been sorted — consider + /// calling [`sort_by`](Self::sort_by) first. + #[inline] + pub fn get(&self, index: usize) -> Option> { + self.data.get(index).copied().map(Cursor) + } + + /// Removes and returns the smallest (front) element, or `None` if empty. + /// + /// Sorts the heap first if necessary. + /// + /// # Complexity + /// O(n log n) when unsorted; O(n) when already sorted (front removal from + /// a `VecDeque` shifts elements). + #[inline] + pub fn pop_front(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option { + self.sort_by(compare); + self.unlink_front() + } + + /// Removes and returns the largest (back) element, or `None` if empty. + /// + /// Sorts the heap first if necessary. + /// + /// # Complexity + /// O(n log n) when unsorted; O(1) when already sorted. + #[inline] + pub fn pop_back(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option { + self.sort_by(compare); + self.unlink_back() + } + + /// Removes and returns the element identified by `cursor`. + /// + /// Sorts the heap first if necessary, then performs a linear scan to + /// locate the element by pointer identity. + /// + /// # Complexity + /// O(n log n) when unsorted; O(n) when already sorted. + pub fn remove( + &mut self, + cursor: Cursor, + compare: impl Fn(&T, &T) -> std::cmp::Ordering, + ) -> T { + debug_assert!(!self.data.is_empty()); + + // Fast path: single element — no need to sort or scan. + if self.data.len() == 1 { + return self.unlink_back().unwrap(); + } + + self.sort_by(compare); + + let index = self + .data + .iter() + .position(|ptr| cursor.0 == *ptr) + .expect("cursor does not belong to this LazyHeap"); + + // SAFETY: `index` was just returned by `position`, so it is in bounds. + // LazyHeap holds the sole Box for this pointer; the cursor is non-owning. + let ptr = unsafe { self.data.remove(index).unwrap_unchecked() }; + *unsafe { Box::from_raw(ptr.as_ptr()) } + } + + /// Returns an iterator that yields a [`Cursor`] for each element in sorted + /// order. + /// + /// Sorts the heap first if necessary. The returned [`Iter`] holds raw + /// pointers into the backing buffer; do not mutate or drop the heap while + /// it is alive. + #[inline] + pub fn iter(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> RawIter { + self.sort_by(compare); + let (a, b) = self.data.as_slices(); + RawIter { + first: utils::RawSliceIter::new(a), + second: utils::RawSliceIter::new(b), + } + } + + /// Removes all elements, dropping each one. + /// + /// The heap is empty and considered sorted after this call. + #[inline] + pub fn clear(&mut self) { + while self.unlink_back().is_some() {} + self.is_sorted = true; + } + + /// Shrinks the backing buffer's capacity as close to its current length + /// as possible. + #[inline] + pub fn shrink_to_fit(&mut self) { + self.data.shrink_to_fit(); + } +} + +impl Default for LazyHeap { + fn default() -> Self { + Self::new() + } +} + +unsafe impl<#[may_dangle] T> Drop for LazyHeap { + fn drop(&mut self) { + struct DropGuard<'a, T>(&'a mut LazyHeap); + + impl<'a, T> Drop for DropGuard<'a, T> { + fn drop(&mut self) { + // Continue the same loop we do below. This only runs when a destructor has + // panicked. If another one panics this will abort. + while self.0.unlink_back().is_some() {} + } + } + + // Wrap self so that if a destructor panics, we can try to keep looping + let guard = DropGuard(self); + while guard.0.unlink_back().is_some() {} + std::mem::forget(guard); + } +} + +/// A non-owning, pointer-sized handle to an element stored in a [`LazyHeap`]. +/// +/// Think of `Cursor` as a stable address you can cache in an external data +/// structure (e.g. `hashbrown::raw::RawTable`) and later hand back to +/// [`LazyHeap::remove`] for cheap lookup and removal. It carries **no +/// ownership**: every allocation is owned exclusively by the heap that +/// produced the cursor. +/// +/// Using a stale cursor is undefined behaviour. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct Cursor(NonNull); + +impl Cursor { + /// Returns a shared reference to the value this cursor points to. + /// + /// # Safety + /// The cursor must be valid (see the [type-level docs](Self)). + #[inline] + pub unsafe fn element(&self) -> &T { + self.0.as_ref() + } + + /// Returns a mutable reference to the value this cursor points to. + /// + /// If the mutation changes any field that affects sort order, you **must** + /// call [`LazyHeap::invalidate`] afterwards so the heap re-sorts before + /// the next ordered operation. + /// + /// # Safety + /// - The cursor must be valid (see the [type-level docs](Self)). + /// - No other reference to the same element may be alive simultaneously. + #[inline] + pub unsafe fn element_mut(&mut self) -> &mut T { + self.0.as_mut() + } + + /// Returns the raw pointer underlying this cursor. + /// + /// Prefer [`as_ref`](Self::as_ref) or [`as_mut`](Self::as_mut) for + /// element access. This exists for interoperability with APIs that require + /// a raw pointer (e.g. hashing into a `RawTable` by address). + /// + /// **Never** reconstruct a `Box` from this pointer — doing so transfers + /// ownership out of the heap and causes a double-free. + #[inline] + pub fn as_ptr(&self) -> *mut T { + self.0.as_ptr() + } +} + +/// Raw iterator for [`VecDeque`] which doesn't have lifetime. +/// +/// # Safety +/// You should track changes of [`VecDeque`] yourself. +pub struct RawIter { + first: utils::RawSliceIter>, + second: utils::RawSliceIter>, +} + +impl Iterator for RawIter { + type Item = Cursor; + + #[inline] + fn next(&mut self) -> Option { + match self.first.next() { + Some(val) => Some( + // SAFETY: `val` is a valid `NonNull>` pointing into the + // first slice of the `VecDeque`. The pointee is `Copy` and remains + // valid as long as the `VecDeque` is alive and unmodified, which the + // caller is required to uphold per this type's safety contract. + Cursor(unsafe { val.read() }), + ), + None => { + std::mem::swap(&mut self.first, &mut self.second); + // SAFETY: same as above. + self.first.next().map(|val| Cursor(unsafe { val.read() })) + } + } + } +} + +unsafe impl Send for LazyHeap {} +unsafe impl Sync for LazyHeap {} +unsafe impl Send for RawIter {} +unsafe impl Sync for RawIter {} +unsafe impl Send for Cursor {} +unsafe impl Sync for Cursor {} diff --git a/src/internal/linked_list.rs b/src/internal/linked_list.rs index d7386ed..f9d0da1 100644 --- a/src/internal/linked_list.rs +++ b/src/internal/linked_list.rs @@ -17,11 +17,12 @@ impl Node { } } + #[allow(clippy::boxed_local)] fn into_element(self: Box) -> T { self.element } - pub fn element<'a>(&'a self) -> &'a T { + pub fn element(&self) -> &T { &self.element } } @@ -34,7 +35,7 @@ pub struct LinkedList { head: Option>>, tail: Option>>, len: usize, - marker: PhantomData>>, + _marker: PhantomData>>, } // private methods @@ -185,7 +186,7 @@ impl LinkedList { head: None, tail: None, len: 0, - marker: PhantomData, + _marker: PhantomData, } } @@ -216,7 +217,7 @@ impl LinkedList { head: self.head.take(), tail: self.tail.take(), len: mem::take(&mut self.len), - marker: PhantomData, + _marker: PhantomData, }); } diff --git a/src/internal/mod.rs b/src/internal/mod.rs index 852e7d8..937b4df 100644 --- a/src/internal/mod.rs +++ b/src/internal/mod.rs @@ -1,4 +1,5 @@ pub mod alias; +pub mod lazyheap; pub mod linked_list; pub mod onceinit; pub mod pickle; diff --git a/src/internal/utils.rs b/src/internal/utils.rs index 716a424..74772a6 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -344,3 +344,82 @@ impl From for Option { value.0 } } + +/// Immutable slice iterator without lifetime +/// +/// # Safety +/// - You should be sure about lifetimes, and pointers should be alive while this type is alive. +/// Any changes to pointers can cause *Undefined Behaviour*. +/// - It doesn't support `ZST`s. +pub(super) struct RawSliceIter { + pointer: std::ptr::NonNull, + index: usize, + len: usize, +} + +impl RawSliceIter { + /// Creates a new [`RawSliceIter`] + #[inline] + pub(super) fn new(slice: &[T]) -> Self { + let pointer: std::ptr::NonNull = std::ptr::NonNull::from(slice).cast(); + + Self { + pointer, + index: 0, + len: slice.len(), + } + } +} + +impl Iterator for RawSliceIter { + type Item = std::ptr::NonNull; + + #[inline] + fn next(&mut self) -> Option { + if self.index >= self.len { + None + } else { + let value = unsafe { self.pointer.add(self.index) }; + self.index += 1; + Some(value) + } + } +} + +unsafe impl Send for RawSliceIter {} +unsafe impl Sync for RawSliceIter {} + +/// Raw iterator for [`VecDeque`] which doesn't have lifetime. +/// +/// # Safety +/// You should track changes of [`VecDeque`] yourself. +pub struct RawVecDequeIter { + first: RawSliceIter, + second: RawSliceIter, +} + +impl RawVecDequeIter { + /// Creates a new [`RawVecDequeIter`] + #[inline] + pub fn new(first: &[T], second: &[T]) -> Self { + Self { + first: RawSliceIter::new(first), + second: RawSliceIter::new(second), + } + } +} + +impl Iterator for RawVecDequeIter { + type Item = std::ptr::NonNull; + + #[inline] + fn next(&mut self) -> Option { + match self.first.next() { + Some(val) => Some(val), + None => { + std::mem::swap(&mut self.first, &mut self.second); + self.first.next() + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index fe5f01f..d50fc12 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,9 @@ pub mod internal; pub mod policies; pub mod pyclasses; +// TODO: refactor pickle +// TODO: refactor deepcopy + #[pyo3::pymodule] mod _core { #[allow(unused_imports)] @@ -59,6 +62,15 @@ mod _core { #[pymodule_export] use crate::pyclasses::lrucache::PyLRUCacheValues; + #[pymodule_export] + use crate::pyclasses::lfucache::PyLFUCache; + #[pymodule_export] + use crate::pyclasses::lfucache::PyLFUCacheItems; + #[pymodule_export] + use crate::pyclasses::lfucache::PyLFUCacheKeys; + #[pymodule_export] + use crate::pyclasses::lfucache::PyLFUCacheValues; + #[pymodule_init] pub fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { typeref::initialize_typeref(m.py()); diff --git a/src/policies/common.rs b/src/policies/common.rs index 7d35b8b..ecfc9a6 100644 --- a/src/policies/common.rs +++ b/src/policies/common.rs @@ -144,82 +144,3 @@ impl traits::SharedExt for Shared { } } } - -/// Immutable slice iterator without lifetime -/// -/// # Safety -/// - You should be sure about lifetimes, and pointers should be alive while this type is alive. -/// Any changes to pointers can cause *Undefined Behaviour*. -/// - It doesn't support `ZST`s. -struct RawSliceIter { - pointer: std::ptr::NonNull, - index: usize, - len: usize, -} - -impl RawSliceIter { - /// Creates a new [`RawSliceIter`] - #[inline] - fn new(slice: &[T]) -> Self { - let pointer: std::ptr::NonNull = std::ptr::NonNull::from(slice).cast(); - - Self { - pointer, - index: 0, - len: slice.len(), - } - } -} - -impl Iterator for RawSliceIter { - type Item = std::ptr::NonNull; - - #[inline] - fn next(&mut self) -> Option { - if self.index >= self.len { - None - } else { - let value = unsafe { self.pointer.add(self.index) }; - self.index += 1; - Some(value) - } - } -} - -unsafe impl Send for RawSliceIter {} -unsafe impl Sync for RawSliceIter {} - -/// Raw iterator for [`VecDeque`] which doesn't have lifetime. -/// -/// # Safety -/// You should track changes of [`VecDeque`] yourself. -pub struct RawVecDequeIter { - first: RawSliceIter, - second: RawSliceIter, -} - -impl RawVecDequeIter { - /// Creates a new [`RawVecDequeIter`] - #[inline] - pub fn new(first: &[T], second: &[T]) -> Self { - Self { - first: RawSliceIter::new(first), - second: RawSliceIter::new(second), - } - } -} - -impl Iterator for RawVecDequeIter { - type Item = std::ptr::NonNull; - - #[inline] - fn next(&mut self) -> Option { - match self.first.next() { - Some(val) => Some(val), - None => { - std::mem::swap(&mut self.first, &mut self.second); - self.first.next() - } - } - } -} diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index 748204e..e0f8436 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -2,7 +2,6 @@ use std::collections::VecDeque; use crate::hashbrown; use crate::internal::utils; -use crate::policies::common::RawVecDequeIter; use crate::policies::traits; use crate::policies::traits::HandleExt; use crate::policies::traits::PolicyExt; @@ -168,7 +167,7 @@ impl FIFOPolicy { } #[inline] - pub fn vecdeque(&self) -> &VecDeque { + pub fn entries(&self) -> &VecDeque { &self.entries } @@ -235,9 +234,9 @@ impl FIFOPolicy { } #[inline] - pub unsafe fn iter(&self) -> RawVecDequeIter { + pub fn iter(&self) -> utils::RawVecDequeIter { let (first, second) = self.entries.as_slices(); - RawVecDequeIter::new(first, second) + utils::RawVecDequeIter::new(first, second) } } @@ -269,7 +268,7 @@ impl PolicyExt for FIFOPolicy { let eq = |index: &usize| { self.entries[(*index) - self.front_offset] .key() - .py_eq(py, &key) + .py_eq(py, key) }; match self.table.get(key.hash(), eq)? { Some(index) => Ok(Some(&self.entries[(*index) - self.front_offset])), @@ -286,7 +285,7 @@ impl PolicyExt for FIFOPolicy { let eq = |index: &usize| { self.entries[(*index) - self.front_offset] .key() - .py_eq(py, &key) + .py_eq(py, key) }; match self.table.find(key.hash(), eq)? { Some(bucket) => { @@ -336,23 +335,12 @@ impl PolicyExt for FIFOPolicy { #[inline] fn shrink_to_fit(&mut self, shared: &Self::Shared) { - // Shrink table - let initial = self.table.capacity(); + shared.generation_version().increment(); + self.table.shrink_to(0, |index| { self.entries[(*index) - self.front_offset].key().hash() }); - - if initial != self.table.capacity() { - shared.generation_version().increment(); - } - - // Shrink entries - let initial = self.entries.capacity(); self.entries.shrink_to_fit(); - - if initial != self.entries.capacity() { - shared.generation_version().increment(); - } } #[inline] @@ -423,7 +411,7 @@ impl PolicyExt for FIFOPolicy { Ok(result) } - fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { let mut entries = VecDeque::with_capacity(self.entries.len()); for handle in self.entries.iter() { entries.push_back(handle.clone_ref(py)); diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs new file mode 100644 index 0000000..47305e3 --- /dev/null +++ b/src/policies/lfupolicy.rs @@ -0,0 +1,477 @@ +use crate::hashbrown; +use crate::internal::alias; +use crate::internal::lazyheap; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; + +pub use crate::policies::common::Shared; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Frequency(usize); + +impl Frequency { + #[inline(always)] + fn increment(&mut self) { + self.0 = self.0.saturating_add(1) + } +} + +/// Same as [`Handle`](struct@super::common::Handle), but with a frequency counter. +pub struct FrequencyHandle { + key: utils::PrecomputedHashObject, + value: alias::PyObject, + size: usize, + frequency: Frequency, +} + +impl FrequencyHandle { + /// Creates a new [`FrequencyHandle`] + #[inline] + pub fn new( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + key: alias::PyObject, + value: alias::PyObject, + // initial frequency + frequency: usize, + ) -> pyo3::PyResult { + Self::with_precomputed_hash_key( + py, + getsizeof, + utils::PrecomputedHashObject::new(py, key)?, + value, + frequency, + ) + } + + /// Creates a new [`FrequencyHandle`] from an already-hashed key. + #[inline] + pub fn with_precomputed_hash_key( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + key: utils::PrecomputedHashObject, + value: alias::PyObject, + // initial frequency + frequency: usize, + ) -> pyo3::PyResult { + let size = getsizeof.call(py, key.as_ref(), &value)?; + Ok(Self { + key, + value, + size, + frequency: Frequency(frequency), + }) + } + + /// Returns the frequency. + #[inline] + pub fn frequency(&self) -> usize { + self.frequency.0 + } + + /// Consumes `self` and returns the [`utils::PrecomputedHashObject`]. + #[inline] + pub fn into_key(self) -> utils::PrecomputedHashObject { + self.key + } + + /// Returns a reference to the value. + #[inline] + pub fn value(&self) -> &alias::PyObject { + &self.value + } + + /// Consumes `self` and returns the value of the pair. + #[inline] + pub fn into_value(self) -> alias::PyObject { + self.value + } + + /// Consumes `self` and returns the pair. + #[inline] + pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { + (self.key, self.value) + } + + /// Makes a clone of self. + /// + /// This creates another pointer to the same object, increasing its reference count. + #[inline] + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self { + key: self.key.clone_ref(py), + value: self.value.clone_ref(py), + size: self.size, + frequency: self.frequency, + } + } +} + +impl HandleExt for FrequencyHandle { + type Key = utils::PrecomputedHashObject; + + #[inline(always)] + fn key(&self) -> &utils::PrecomputedHashObject { + &self.key + } + + #[inline(always)] + fn size(&self) -> usize { + self.size + } +} + +/// A view into an occupied entry in [`LFUPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut LFUPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied index. + bucket: hashbrown::raw::Bucket>, +} + +impl traits::EntryExt for Occupied<'_> { + type Handle = FrequencyHandle; + type Shared = Shared; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + let handle = unsafe { self.bucket.as_ref().element() }; + + self.policy + .currsize + .saturating_add(extra_size) + .saturating_sub(handle.size()) + > self.shared.maxsize() + } + + #[inline] + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { + self.policy.evict(py, self.shared) + } +} + +impl traits::OccupiedExt for Occupied<'_> { + fn replace(self, new: Self::Handle) -> Self::Handle { + // Here we don't need to increment generation version + // self.shared.generation_version().increment(); + + unsafe { + let cursor = self.bucket.as_mut(); + + self.policy.currsize = self + .policy + .currsize + .saturating_sub(cursor.element().size()) + .saturating_add(new.size()); + + let old = std::mem::replace(cursor.element_mut(), new); + + cursor.element_mut().frequency.increment(); + self.policy.heap.mark_unsorted(); + + old + } + } + + #[inline] + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + + let (cursor, _) = unsafe { self.policy.table.remove(self.bucket) }; + let item = self + .policy + .heap + .remove(cursor, |x, y| x.frequency.cmp(&y.frequency)); + + self.policy.currsize = self.policy.currsize.saturating_sub(item.size()); + item + } +} + +/// A view into a vacant slot in [`LFUPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut LFUPolicy, + /// The shared configuration + shared: &'a Shared, +} + +impl traits::EntryExt for Vacant<'_> { + type Handle = FrequencyHandle; + type Shared = Shared; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + #[inline] + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { + self.policy.evict(py, self.shared) + } +} + +impl traits::VacantExt for Vacant<'_> { + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = self.policy.heap.push(handle); + + self.policy + .table + .insert(hash, cursor, |x| unsafe { x.element().key().hash() }); + } +} + +pub struct LFUPolicy { + /// Maps each key to its node pointer into [`LFUPolicy::entries`], enabling O(1) lookups. + table: hashbrown::raw::RawTable>, + + /// A lazy binary heap. + heap: lazyheap::LazyHeap, + + /// Running total of all stored handles' sizes, maintained incrementally. + currsize: usize, +} + +impl LFUPolicy { + /// Creates a new [`LFUPolicy`]. + /// + /// The underlying hash map is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + heap: lazyheap::LazyHeap::new(), + currsize: 0, + } + } + + #[inline] + pub fn table(&self) -> &hashbrown::raw::RawTable> { + &self.table + } + + #[inline] + pub fn heap(&self) -> &lazyheap::LazyHeap { + &self.heap + } + + #[inline] + pub fn heap_mut(&mut self) -> &mut lazyheap::LazyHeap { + &mut self.heap + } + + #[inline] + pub fn peek( + &self, + py: pyo3::Python, + key: &utils::PrecomputedHashObject, + ) -> pyo3::PyResult> { + unsafe { + let bucket = self + .table + .find(key.hash(), |cursor| key.py_eq(py, &cursor.element().key))?; + + Ok(bucket.map(|x| x.as_ref().element())) + } + } +} + +impl PolicyExt for LFUPolicy { + type Shared = Shared; + type Handle = FrequencyHandle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let cursor = self + .table + .get_mut(key.hash(), |x| unsafe { key.py_eq(py, &x.element().key) })?; + + match cursor { + Some(cursor) => unsafe { + // increment frequency + cursor.element_mut().frequency.increment(); + + Ok(Some(cursor.element())) + }, + None => Ok(None), + } + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + let eq = |cursor: &lazyheap::Cursor| unsafe { + key.py_eq(py, cursor.element().key()) + }; + + match self.table.find(key.hash(), eq)? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + fn evict(&mut self, _py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { + { + let front_cursor = self + .heap + .front(|x, y| x.frequency.cmp(&y.frequency)) + .ok_or_else(|| new_py_error!(PyKeyError, "cache is empty"))?; + + self.table + .remove_entry(unsafe { front_cursor.element().key.hash() }, |x| { + Ok::<_, pyo3::PyErr>(std::ptr::eq(front_cursor.as_ptr(), x.as_ptr())) + })? + .expect("evict: item not found in table"); + } + + shared.generation_version().increment(); + + let handle = self + .heap + .pop_front(|x, y| x.frequency.cmp(&y.frequency)) + .unwrap(); + + self.currsize = self.currsize.saturating_sub(handle.size); + Ok(handle) + } + + fn clear(&mut self, shared: &Self::Shared) { + if self.heap.is_empty() { + return; + } + + shared.generation_version().increment(); + self.table.clear_no_drop(); + self.heap.clear(); + self.currsize = 0; + } + + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + shared.generation_version().increment(); + + self.table + .shrink_to(0, |x| unsafe { x.element().key.hash() }); + + self.heap.shrink_to_fit(); + } + + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + let result = unsafe { + let mut iterator = self.table.iter().map(|x| x.as_ref()); + + iterator.all(|cursor_1| { + let handle_1 = cursor_1.element(); + + let result = other.table.get(handle_1.key().hash(), |cursor| { + handle_1.key().py_eq(py, cursor.element().key()) + }); + + match result { + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + Ok(None) => false, + Ok(Some(cursor_2)) => { + let handle_2 = cursor_2.element(); + + match utils::pyobject_equal( + py, + handle_1.value.as_ptr(), + handle_2.value.as_ptr(), + ) { + Ok(result) => result, + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + } + } + } + }) + }; + + if let Some(error) = error { + return Err(error); + } + Ok(result) + } + + fn clone_ref(&mut self, py: pyo3::Python) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.table.len()); + let mut heap = lazyheap::LazyHeap::new(); + + unsafe { + for cursor in self.heap.iter(|x, y| x.frequency.cmp(&y.frequency)) { + let cloned_handle = cursor.element().clone_ref(py); + let new_cursor = heap.push(cloned_handle); + table.insert_no_grow(new_cursor.element().key().hash(), new_cursor); + } + } + + Self { + table, + heap, + currsize: self.currsize, + } + } +} diff --git a/src/policies/lrupolicy.rs b/src/policies/lrupolicy.rs index f07c345..def7d84 100644 --- a/src/policies/lrupolicy.rs +++ b/src/policies/lrupolicy.rs @@ -54,7 +54,7 @@ impl traits::OccupiedExt for Occupied<'_> { .saturating_add(new.size()); let old = std::mem::replace(cursor.element_mut(), new); - cursor.move_to_back(&mut self.policy.entries); + cursor.move_to_back(&mut self.policy.list); old } @@ -65,7 +65,7 @@ impl traits::OccupiedExt for Occupied<'_> { self.shared.generation_version().increment(); let (cursor, _) = unsafe { self.policy.table.remove(self.bucket) }; - let item = unsafe { cursor.unlink(&mut self.policy.entries) }; + let item = unsafe { cursor.unlink(&mut self.policy.list) }; self.policy.currsize = self.policy.currsize.saturating_sub(item.size()); item @@ -102,7 +102,7 @@ impl traits::VacantExt for Vacant<'_> { self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); let hash = handle.key().hash(); - let cursor = self.policy.entries.push_back(handle); + let cursor = self.policy.list.push_back(handle); self.policy .table @@ -111,11 +111,11 @@ impl traits::VacantExt for Vacant<'_> { } pub struct LRUPolicy { - /// Maps each key to its node pointer into [`FIFOPolicy::entries`], enabling O(1) lookups. + /// Maps each key to its node pointer into [`LRUPolicy::list`], enabling O(1) lookups. table: hashbrown::raw::RawTable>, /// A doubly-linked list, which holds cached handles, providing O(1) pops (front/back) and pushes (front/back). - entries: linked_list::LinkedList, + list: linked_list::LinkedList, /// Running total of all stored handles' sizes, maintained incrementally. currsize: usize, @@ -129,7 +129,7 @@ impl LRUPolicy { pub fn new(capacity: usize) -> Self { Self { table: hashbrown::raw::RawTable::with_capacity(capacity), - entries: linked_list::LinkedList::new(), + list: linked_list::LinkedList::new(), currsize: 0, } } @@ -140,8 +140,8 @@ impl LRUPolicy { } #[inline] - pub fn linked_list(&self) -> &linked_list::LinkedList { - &self.entries + pub fn list(&self) -> &linked_list::LinkedList { + &self.list } #[inline] @@ -188,12 +188,12 @@ impl PolicyExt for LRUPolicy { unsafe { let bucket = self .table - .find(key.hash(), |cursor| key.py_eq(py, cursor.element().key()))?; + .get(key.hash(), |cursor| key.py_eq(py, cursor.element().key()))?; match bucket { Some(cursor) => { - cursor.as_mut().move_to_back(&mut self.entries); - Ok(Some(cursor.as_ref().element())) + cursor.move_to_back(&mut self.list); + Ok(Some(cursor.element())) } None => Ok(None), } @@ -230,7 +230,7 @@ impl PolicyExt for LRUPolicy { fn evict(&mut self, _py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { { - let front_cursor = match self.entries.cursor_front() { + let front_cursor = match self.list.cursor_front() { Some(x) => x, None => return Err(new_py_error!(PyKeyError, "cache is empty")), }; @@ -243,31 +243,26 @@ impl PolicyExt for LRUPolicy { .expect("evict: key not found in table."); } - let handle = unsafe { self.entries.pop_front().unwrap_unchecked() }; + let handle = unsafe { self.list.pop_front().unwrap_unchecked() }; self.currsize = self.currsize.saturating_sub(handle.size()); Ok(handle) } #[inline] - fn shrink_to_fit(&mut self, shared: &Self::Shared) { - let initial = self.table.capacity(); + fn shrink_to_fit(&mut self, _shared: &Self::Shared) { self.table .shrink_to(0, |cursor| unsafe { cursor.element().key().hash() }); - - if initial != self.table.capacity() { - shared.generation_version().increment(); - } } #[inline] fn clear(&mut self, shared: &Self::Shared) { - if self.entries.is_empty() { + if self.list.is_empty() { return; } shared.generation_version().increment(); self.table.clear_no_drop(); - self.entries.clear(); + self.list.clear(); self.currsize = 0; } @@ -325,12 +320,12 @@ impl PolicyExt for LRUPolicy { Ok(result) } - fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { - let mut table = hashbrown::raw::RawTable::with_capacity(self.entries.len()); + fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.list.len()); let mut entries = linked_list::LinkedList::new(); unsafe { - for cursor in self.entries.iter() { + for cursor in self.list.iter() { let cloned_handle = cursor.element().clone_ref(py); let new_cursor = entries.push_back(cloned_handle); table.insert_no_grow(new_cursor.element().key().hash(), new_cursor); @@ -339,7 +334,7 @@ impl PolicyExt for LRUPolicy { Self { table, - entries, + list: entries, currsize: self.currsize, } } diff --git a/src/policies/mod.rs b/src/policies/mod.rs index d8c6784..257fa96 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -2,6 +2,7 @@ pub mod common; pub mod traits; pub mod fifopolicy; +pub mod lfupolicy; pub mod lrupolicy; pub mod nopolicy; pub mod rrpolicy; diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 60de168..391cace 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -190,12 +190,8 @@ impl traits::PolicyExt for NoPolicy { #[inline] fn shrink_to_fit(&mut self, shared: &Self::Shared) { - let initial = self.table.capacity(); + shared.generation_version().increment(); self.table.shrink_to(0, |x| x.key().hash()); - - if initial != self.table.capacity() { - shared.generation_version().increment(); - } } #[inline] @@ -258,7 +254,7 @@ impl traits::PolicyExt for NoPolicy { Ok(result) } - fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { let mut table = hashbrown::raw::RawTable::with_capacity(self.table.capacity()); unsafe { diff --git a/src/policies/rrpolicy.rs b/src/policies/rrpolicy.rs index 175229a..f1cef32 100644 --- a/src/policies/rrpolicy.rs +++ b/src/policies/rrpolicy.rs @@ -201,12 +201,8 @@ impl PolicyExt for RRPolicy { #[inline] fn shrink_to_fit(&mut self, shared: &Self::Shared) { - let initial = self.table.capacity(); + shared.generation_version().increment(); self.table.shrink_to(0, |x| x.key().hash()); - - if initial != self.table.capacity() { - shared.generation_version().increment(); - } } #[inline] @@ -269,7 +265,7 @@ impl PolicyExt for RRPolicy { Ok(result) } - fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { let mut table = hashbrown::raw::RawTable::with_capacity(self.table.capacity()); unsafe { diff --git a/src/policies/traits.rs b/src/policies/traits.rs index a3aa8b4..d3a5153 100644 --- a/src/policies/traits.rs +++ b/src/policies/traits.rs @@ -116,14 +116,6 @@ pub trait PolicyExt { ) -> pyo3::PyResult, Self::Vacant<'a>>>; /// Evicts a handle according to the policy algorithm, returning it. - /// - /// # Errors - /// - /// Returns `Err` if dropping the evicted value raises a Python exception. - /// - /// # Panics - /// - /// May panic if the policy is empty. fn evict(&mut self, py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult; /// Removes all handles without shrinking the allocation. @@ -142,5 +134,5 @@ pub trait PolicyExt { ) -> pyo3::PyResult; /// Make a clone of `self`. - fn clone_ref(&self, py: pyo3::Python) -> Self; + fn clone_ref(&mut self, py: pyo3::Python) -> Self; } diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index 2a7f1c1..65b01f4 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -1,7 +1,6 @@ use crate::internal::alias; use crate::internal::onceinit; use crate::internal::utils; -use crate::policies::common::RawVecDequeIter; use crate::policies::fifopolicy; use crate::policies::traits::HandleExt; use crate::policies::traits::PolicyExt; @@ -147,7 +146,7 @@ impl PyFIFOCache { let inner = self.0.get(); let policy = inner.policy(); - policy.table().capacity().min(policy.vecdeque().capacity()) + policy.table().capacity().min(policy.entries().capacity()) } /// Returns the number of entries currently in the cache. @@ -156,7 +155,7 @@ impl PyFIFOCache { let inner = self.0.get(); let policy = inner.policy(); - debug_assert!(policy.table().len() == policy.vecdeque().len()); + debug_assert!(policy.table().len() == policy.entries().len()); policy.table().len() } @@ -166,7 +165,7 @@ impl PyFIFOCache { let policy = inner.policy(); let table_cap = policy.table().capacity() * std::mem::size_of::(); - let vecdeque_cap = policy.vecdeque().capacity() * std::mem::size_of::(); + let vecdeque_cap = policy.entries().capacity() * std::mem::size_of::(); table_cap + vecdeque_cap } @@ -498,7 +497,7 @@ impl PyFIFOCache { // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] let result = PyFIFOCacheItems { - iter: parking_lot::Mutex::new(unsafe { inner.policy().iter() }), + iter: parking_lot::Mutex::new(inner.policy().iter()), gv, initial_gv, }; @@ -512,7 +511,7 @@ impl PyFIFOCache { // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] let result = PyFIFOCacheValues { - iter: parking_lot::Mutex::new(unsafe { inner.policy().iter() }), + iter: parking_lot::Mutex::new(inner.policy().iter()), gv, initial_gv, }; @@ -526,7 +525,7 @@ impl PyFIFOCache { // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] let result = PyFIFOCacheKeys { - iter: parking_lot::Mutex::new(unsafe { inner.policy().iter() }), + iter: parking_lot::Mutex::new(inner.policy().iter()), gv, initial_gv, }; @@ -556,7 +555,7 @@ impl PyFIFOCache { let shared = inner.shared(); let policy = inner.policy(); - let iter = policy.vecdeque().iter().map(|handle| { + let iter = policy.entries().iter().map(|handle| { ( // Without using `.bind` it returns something like `Py(addr)` handle.key().as_ref().bind(py), @@ -584,13 +583,13 @@ impl PyFIFOCache { let policy = inner.policy(); if n < 0 { - n = (policy.vecdeque().len() as isize) + n; + n += policy.entries().len() as isize; } if n < 0 { return Err(new_py_error!(PyIndexError, "`n` out of range")); } - match policy.vecdeque().get(n as usize) { + match policy.entries().get(n as usize) { Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), None => Err(new_py_error!(PyIndexError, "`n` out of range")), } @@ -599,7 +598,7 @@ impl PyFIFOCache { fn last(&self, py: pyo3::Python) -> pyo3::PyResult { let inner = self.0.get(); let policy = inner.policy(); - match policy.vecdeque().back() { + match policy.entries().back() { Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), None => Err(new_py_error!(PyIndexError, "`n` out of range")), } @@ -609,7 +608,7 @@ impl PyFIFOCache { let inner = self.0.get(); let policy = inner.policy(); - for handle in policy.vecdeque().iter() { + for handle in policy.entries().iter() { visit.call(handle.key().as_ref())?; visit.call(handle.value())?; } @@ -637,7 +636,7 @@ macro_rules! implement_iterator { $name as $pyname { initial_gv: u32, gv: utils::GenerationVersion, - iter: parking_lot::Mutex>, + iter: parking_lot::Mutex>, } } diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs new file mode 100644 index 0000000..a895114 --- /dev/null +++ b/src/pyclasses/lfucache.rs @@ -0,0 +1,773 @@ +use crate::internal::alias; +use crate::internal::lazyheap; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::lfupolicy; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A Least-Frequently-Used (LFU) cache eviction policy: when the cache is full, the item + /// with the lowest access count is evicted first. Ties in frequency are broken by recency - + /// among equally rare items, the oldest is evicted. + /// + /// ## How It Works + /// The LFU algorithm tracks how many times each cached item has been accessed, and always + /// evicts the item with the smallest count. This makes it well-suited for workloads where + /// some items are structurally "hot" and where that frequency signal is stable enough to + /// be worth preserving across cache pressure events. + /// + /// This implementation uses a `lazy binary min-heap` keyed on access frequency, paired with + /// a `hash map` that maps each key to its cursor (a stable pointer into the heap's backing + /// buffer). The heap is "lazy" in the sense that it does not restore the heap invariant after + /// every frequency increment; instead it sets a dirty flag and defers the full re-sort until + /// the next eviction. This amortises the cost of heap maintenance across many hits, so + /// read-heavy workloads pay far less per operation than a classic eager heap would require. + /// + /// On a cache hit, the item's frequency counter is incremented in O(1) and the heap is marked + /// dirty. On eviction, the heap is sorted if dirty, and the minimum-frequency item is popped + /// in O(n log n) worst-case (amortised O(log n) under typical access distributions). Lookups + /// are O(1) via the hash map. + /// + /// ### Pros + /// - Frequency-aware eviction. Items that are accessed often are protected from eviction even + /// under heavy cache pressure, leading to higher hit rates on skewed workloads. + /// - O(1) cache hits. Incrementing a counter and marking the heap dirty is constant-time work, + /// with no structural reorganisation on the hot path. + /// - Lazy heap sorting amortises O(n log n) sort cost across many inserts and hits, keeping + /// the average cost per operation much lower than a naive eager implementation. + /// + /// ### Cons + /// - Eviction is O(n log n) worst-case. If the heap is maximally dirty (every entry modified + /// since last sort), a single eviction triggers a full re-sort over all entries. This is + /// amortised away in practice but introduces latency spikes under adversarial access patterns. + /// - Frequency counters accumulate indefinitely. A key that was hot during an early burst remains + /// privileged long after traffic shifts, causing "cache pollution" - stale items that monopolise + /// capacity because of historical frequency, not current utility. + /// - Access patterns must be skewed for LFU to outperform simpler policies. On uniform workloads, + /// frequency counters provide no signal and the extra bookkeeping is pure overhead. + /// + /// ## When to use it + /// Reach for `LFUPolicy` when: + /// - Your workload has a stable hot set: a minority of keys that are accessed disproportionately + /// often and whose relative popularity changes slowly over time. + /// - Cache pollution from one-time scans is a concern: LFU naturally resists large sequential reads + /// from displacing frequently accessed items, because freshly inserted keys start at count 1 and + /// are evicted before any item with accumulated hits. + /// - Hit rate matters more than worst-case eviction latency: the amortised cost is low, but if your + /// system has hard real-time latency requirements, the occasional sort spike may be unacceptable. + /// + /// Avoid it when access patterns shift rapidly. If the "hot" subset of keys changes frequently, + /// frequency counters become stale signals and LFU will evict items that have recently become + /// popular. In those cases, an LRU policy - which tracks recency rather than frequency - will + /// adapt faster and typically deliver better hit rates. + /// + /// Avoid it on uniform workloads where all keys are accessed with roughly equal probability. + /// The frequency signal provides no meaningful discrimination, and the overhead of maintaining + /// counters and a heap is wasted compared to the simpler bookkeeping of FIFO or LRU. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyLFUCache as "LFUCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyLFUCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `LFUCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let wrapped = Wrapped::new( + lfupolicy::LFUPolicy::new(capacity), + lfupolicy::Shared::new(maxsize, getsizeof), + ); + + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| lfupolicy::FrequencyHandle::new(py, &getsizeof, key, value, 1), + ); + self.0.set(wrapped); + result + } else { + self.0.set(wrapped); + Ok(()) + } + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + inner.policy().current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + debug_assert!(policy.table().len() == policy.heap().len()); + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + let table_cap = policy.table().capacity() * 8; + let list_cap = policy.heap().len() * std::mem::size_of::(); + + table_cap + list_cap + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let handle = + lfupolicy::FrequencyHandle::new(py, inner.shared().getsizeof(), key, value, 1)?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let inner = slf.0.get(); + let getsizeof = inner.shared().getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| lfupolicy::FrequencyHandle::new(py, &getsizeof, key, value, 1), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get<'p>( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument<'p>, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x.unbind(), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = lfupolicy::FrequencyHandle::with_precomputed_hash_key( + py, + shared.getsizeof(), + key, + default_object.clone_ref(py), + 1, + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(py, inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + + let mut policy = inner.policy(); + let heap_mut = policy.heap_mut(); + + // TODO: test this edge case + // We don't want to intrupt other iterators with no reason + // so need to manually call sort_by to only intrupt them on changes. + if heap_mut.sort_by(|x, y| x.frequency().cmp(&y.frequency())) { + inner.shared().generation_version().increment(); + } + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + let result = PyLFUCacheItems { + iter: parking_lot::Mutex::new(heap_mut.iter(|x, y| x.frequency().cmp(&y.frequency()))), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + + let mut policy = inner.policy(); + let heap_mut = policy.heap_mut(); + + // TODO: test this edge case + // We don't want to intrupt other iterators with no reason + // so need to manually call sort_by to only intrupt them on changes. + if heap_mut.sort_by(|x, y| x.frequency().cmp(&y.frequency())) { + inner.shared().generation_version().increment(); + } + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyLFUCacheValues { + iter: parking_lot::Mutex::new(heap_mut.iter(|x, y| x.frequency().cmp(&y.frequency()))), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + + let mut policy = inner.policy(); + let heap_mut = policy.heap_mut(); + + // TODO: test this edge case + // We don't want to intrupt other iterators with no reason + // so need to manually call sort_by to only intrupt them on changes. + if heap_mut.sort_by(|x, y| x.frequency().cmp(&y.frequency())) { + inner.shared().generation_version().increment(); + } + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyLFUCacheKeys { + iter: parking_lot::Mutex::new(heap_mut.iter(|x, y| x.frequency().cmp(&y.frequency()))), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + #[inline] + fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.keys(py) + } + + // TODO: support items_with_frequency + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + // We cannot use heap.iter here, because it requires re-sorting + // and this can lead to intrupt iterators. + let iter = unsafe { + policy + .table() + .iter() + .map(|bucket| bucket.as_ref()) + .map(|cursor| { + let handle = cursor.element(); + ( + // Without `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }) + }; + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[{}/{}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + policy.current_size(), + shared.maxsize(), + items + ) + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn peek<'p>( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument<'p>, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let policy = inner.policy(); + + if let Some(x) = policy.peek(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + #[pyo3(signature = (n=0))] + fn least_frequently_used( + &self, + py: pyo3::Python, + mut n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + let mut policy = inner.policy(); + + if n < 0 { + n += policy.table().len() as isize; + } + if n < 0 { + return Err(new_py_error!(PyIndexError, "`n` out of range")); + } + + let heap_mut = policy.heap_mut(); + + if heap_mut.sort_by(|x, y| x.frequency().cmp(&y.frequency())) { + inner.shared().generation_version().increment(); + } + + match heap_mut.get(n as usize) { + Some(handle) => unsafe { + let element = handle.element(); + Ok(element.key().as_ref().clone_ref(py)) + }, + None => Err(new_py_error!(PyIndexError, "`n` out of range")), + } + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + let inner = self.0.get(); + let policy = inner.policy(); + + for cursor in unsafe { policy.table().iter() } { + let handle = unsafe { cursor.as_ref().element() }; + + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] + $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let mut iter = slf.iter.lock(); + + match iter.next() { + Some(x) => { + let $py = slf.py(); + let $handle = unsafe { x.element() }; + Ok($init) + } + None => return Err(new_py_error!(PyStopIteration, ())), + } + } + } + )+ + }; +} +implement_iterator!( + PyLFUCacheItems as "lfucache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyLFUCacheKeys as "lfucache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyLFUCacheValues as "lfucache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs index b10be0c..a0b2ec0 100644 --- a/src/pyclasses/lrucache.rs +++ b/src/pyclasses/lrucache.rs @@ -96,7 +96,7 @@ impl PyLRUCache { ) } - /// Initialize a new `FIFOCache` instance. + /// Initialize a new `LRUCache` instance. /// /// Args: /// maxsize: Maximum number of elements the cache can hold. @@ -181,7 +181,7 @@ impl PyLRUCache { let inner = self.0.get(); let policy = inner.policy(); - debug_assert!(policy.table().len() == policy.linked_list().len()); + debug_assert!(policy.table().len() == policy.list().len()); policy.table().len() } @@ -191,7 +191,7 @@ impl PyLRUCache { let policy = inner.policy(); let table_cap = policy.table().capacity() * 8; - let list_cap = policy.linked_list().len() * std::mem::size_of::(); + let list_cap = policy.list().len() * std::mem::size_of::(); table_cap + list_cap } @@ -524,7 +524,7 @@ impl PyLRUCache { // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] let result = PyLRUCacheItems { - iter: parking_lot::Mutex::new(unsafe { inner.policy().linked_list().iter() }), + iter: parking_lot::Mutex::new(unsafe { inner.policy().list().iter() }), gv, initial_gv, }; @@ -538,7 +538,7 @@ impl PyLRUCache { // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] let result = PyLRUCacheValues { - iter: parking_lot::Mutex::new(unsafe { inner.policy().linked_list().iter() }), + iter: parking_lot::Mutex::new(unsafe { inner.policy().list().iter() }), gv, initial_gv, }; @@ -552,7 +552,7 @@ impl PyLRUCache { // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] let result = PyLRUCacheKeys { - iter: parking_lot::Mutex::new(unsafe { inner.policy().linked_list().iter() }), + iter: parking_lot::Mutex::new(unsafe { inner.policy().list().iter() }), gv, initial_gv, }; @@ -583,7 +583,7 @@ impl PyLRUCache { let policy = inner.policy(); let iter = unsafe { - policy.linked_list().iter().map(|cursor| { + policy.list().iter().map(|cursor| { let handle = cursor.element(); ( // Without `.bind` it returns something like `Py(addr)` @@ -633,7 +633,7 @@ impl PyLRUCache { let inner = self.0.get(); let policy = inner.policy(); - match policy.linked_list().cursor_front() { + match policy.list().cursor_front() { Some(cursor) => Ok(unsafe { cursor.element().key().clone_ref(py).into() }), None => Err(new_py_error!(PyKeyError, "cache is empty")), } @@ -644,7 +644,7 @@ impl PyLRUCache { let inner = self.0.get(); let policy = inner.policy(); - match policy.linked_list().cursor_back() { + match policy.list().cursor_back() { Some(cursor) => Ok(unsafe { cursor.element().key().clone_ref(py).into() }), None => Err(new_py_error!(PyKeyError, "cache is empty")), } @@ -654,7 +654,7 @@ impl PyLRUCache { let inner = self.0.get(); let policy = inner.policy(); - for cursor in unsafe { policy.linked_list().iter() } { + for cursor in unsafe { policy.list().iter() } { let handle = unsafe { cursor.element() }; visit.call(handle.key().as_ref())?; diff --git a/src/pyclasses/mod.rs b/src/pyclasses/mod.rs index 3d16ad8..fa1eeea 100644 --- a/src/pyclasses/mod.rs +++ b/src/pyclasses/mod.rs @@ -1,5 +1,6 @@ pub mod base; pub mod cache; pub mod fifocache; +pub mod lfucache; pub mod lrucache; pub mod rrcache; diff --git a/tests/test_impls.py b/tests/test_impls.py index d0893e0..521dc51 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -636,3 +636,34 @@ def test_peek_no_promote_key(self): assert c.least_recently_used() == "a" assert c.most_recently_used() == "c" + + +class TestLFUCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.PopitemMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.LFUCache: + return cachebox.LFUCache( + maxsize, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) From 1b69b8fb903a45b84c7fdfc1afc056718e8dedfa Mon Sep 17 00:00:00 2001 From: awolverp Date: Sun, 24 May 2026 15:07:38 +0330 Subject: [PATCH 16/60] Improve LFU tests --- cachebox/_core.pyi | 2 +- tests/test_impls.py | 222 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 223 insertions(+), 1 deletion(-) diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 8b8376c..b0fb326 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -821,7 +821,7 @@ class LFUCache(BaseCacheImpl[KT, VT]): def popitem(self) -> typing.Tuple[KT, VT]: """ Removes the least recently used item from the cache and returns it as a (key, value) tuple. - Raises KeyError if the cache is empty. + Raises `KeyError` if the cache is empty. """ ... diff --git a/tests/test_impls.py b/tests/test_impls.py index 521dc51..320a005 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -667,3 +667,225 @@ def create_cache( capacity=capacity, getsizeof=getsizeof, ) + + @staticmethod + def _hit(cache: cachebox.LFUCache, key, times: int = 1) -> None: + """Access a key `times` times to accumulate frequency.""" + for _ in range(times): + cache[key] + + def test_evicts_least_frequent_on_insert(self): + c = self.create_cache(3) + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + self._hit(c, "a", 5) + self._hit(c, "b", 3) + # "c" has frequency 1 — should be evicted + c["d"] = 4 + assert "c" not in c + assert "a" in c + assert "b" in c + assert "d" in c + + def test_evicts_lowest_frequency_not_oldest(self): + """LFU must evict by count, not by insertion order.""" + c = self.create_cache(3) + c["old"] = 0 # inserted first + c["mid"] = 0 + c["new"] = 0 # inserted last + self._hit(c, "old", 10) + self._hit(c, "mid", 10) + # "new" has lowest frequency even though "old" is oldest + c["x"] = 99 + assert "new" not in c + assert "old" in c + assert "mid" in c + + def test_frequency_survives_value_update(self): + """Re-inserting a key should update value but preserve (and increment) frequency.""" + c = self.create_cache(2) + c["a"] = 1 + c["b"] = 1 + self._hit(c, "a", 5) # a.freq = 6 (5 reads + 1 insert) + c["a"] = 99 # update — should NOT reset frequency to 1 + # b has freq=1, a has freq>=6; inserting "c" must evict "b" + c["c"] = 3 + assert "b" not in c + assert "a" in c + + def test_popitem_removes_lfu_item(self): + c = self.create_cache(3) + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + self._hit(c, "a", 5) + self._hit(c, "b", 2) + # c has lowest frequency + key, val = c.popitem() + assert key == "c" + assert val == 3 + assert "c" not in c + + def test_tie_broken_by_recency_oldest_evicted(self): + """When frequencies are equal, the oldest-inserted key is evicted.""" + c = self.create_cache(3) + c["first"] = 1 # inserted first → evicted on tie + c["second"] = 2 + c["third"] = 3 + # All have freq=1; "first" is oldest + c["fourth"] = 4 + assert "first" not in c + + def test_single_item_cache_evicts_on_second_insert(self): + c = self.create_cache(1) + c["only"] = 42 + self._hit(c, "only", 100) + c["new"] = 7 + assert "only" not in c + assert c["new"] == 7 + + def test_get_increments_frequency(self): + c = self.create_cache(2) + c["a"] = 1 + c["b"] = 2 + self._hit(c, "a", 3) # a.freq = 4, b.freq = 1 + c["c"] = 3 # evicts b + assert "b" not in c + assert "a" in c + + def test_setdefault_increments_frequency_on_hit(self): + c = self.create_cache(2) + c["a"] = 1 + c["b"] = 2 + # setdefault on existing key should count as an access + for _ in range(5): + c.setdefault("a", 999) + c["c"] = 3 # should evict "b", not "a" + assert "b" not in c + assert "a" in c + + def test_peek_does_not_increment_frequency(self): + c = self.create_cache(2) + c["a"] = 1 + c["b"] = 2 + + # Peek "a" many times — frequency must NOT change + for _ in range(100): + c.peek("a") + + # hit b once so it has freq=2 vs a's freq=1 + self._hit(c, "b", 1) + c["c"] = 3 # must evict "a" (lower freq due to peek not counting) + assert "a" not in c + assert "b" in c + + def test_least_frequently_used_reflects_access_counts(self): + c = self.create_cache(4) + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + c["d"] = 4 + self._hit(c, "a", 10) + self._hit(c, "b", 5) + self._hit(c, "c", 2) + # d has freq=1, c has freq=3, b has freq=6, a has freq=11 + assert c.least_frequently_used(0) == "d" + assert c.least_frequently_used(1) == "c" + assert c.least_frequently_used(2) == "b" + assert c.least_frequently_used(3) == "a" + + def test_frequency_not_reset_after_pop_and_reinsert(self): + """A key that is popped and re-added starts fresh at frequency 1.""" + c = self.create_cache(2) + c["a"] = 1 + c["b"] = 2 + self._hit(c, "a", 10) + c.pop("a") + c["a"] = 1 # fresh insert — freq resets to 1 + # now b also has freq=1; tie broken by insertion order — a is newer + c["c"] = 3 # should evict b (older with same freq=1) + assert "b" not in c + assert "a" in c + + def test_cache_never_exceeds_maxsize(self): + c = self.create_cache(5) + for i in range(20): + c[i] = i + assert len(c) <= 5 + + def test_update_triggers_eviction(self): + c = self.create_cache(3) + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + self._hit(c, "a", 5) + self._hit(c, "b", 3) + c.update({"d": 4, "e": 5}) + assert len(c) == 3 + + def test_drain_removes_lfu_items_in_order(self): + c = self.create_cache(4) + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + c["d"] = 4 + self._hit(c, "d", 10) + self._hit(c, "c", 5) + self._hit(c, "b", 2) + # a has freq=1 → evicted first; b next; etc. + removed = c.drain(2) + assert removed == 2 + assert "a" not in c + assert "b" not in c + assert "c" in c + assert "d" in c + + def test_single_entry_popitem(self): + c = self.create_cache(10) + c["solo"] = 99 + k, v = c.popitem() + assert k == "solo" and v == 99 + assert len(c) == 0 + + def test_popitem_empty_raises(self): + c = self.create_cache(5) + with pytest.raises(KeyError): + c.popitem() + + def test_least_frequently_used_empty_raises(self): + c = self.create_cache(5) + with pytest.raises(IndexError): + c.least_frequently_used() + + def test_least_frequently_used_out_of_range_raises(self): + c = self.create_cache(5) + c["a"] = 1 + with pytest.raises(IndexError): + c.least_frequently_used(5) + + def test_clear_resets_all_frequencies(self): + c = self.create_cache(3) + c["a"] = 1 + self._hit(c, "a", 50) + c.clear() + assert len(c) == 0 + # After clearing, re-inserted keys start at frequency 1 + c["a"] = 1 + c["b"] = 2 + c["c"] = 3 + # All freq=1; tie → oldest ("a") evicted + c["d"] = 4 + assert "a" not in c + + def test_insert_returns_none_for_new_key(self): + c = self.create_cache(5) + result = c.insert("x", 42) + assert result is None + + def test_insert_returns_old_value_for_existing_key(self): + c = self.create_cache(5) + c["x"] = 1 + old = c.insert("x", 99) + assert old == 1 + assert c["x"] == 99 From 8b2c9ef665c4cabe756093dff0856e33a5a87e6b Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 25 May 2026 15:03:26 +0330 Subject: [PATCH 17/60] Refactor TTLCache & fix some bugs - TTLCache refactored - Some edge case bugs fixed - Tests improved --- Cargo.lock | 233 ++++++++++ Cargo.toml | 3 +- Makefile | 49 -- cachebox/__init__.py | 1 + cachebox/_core.pyi | 234 ++++++++++ src/internal/utils.rs | 26 +- src/lib.rs | 9 + src/policies/common.rs | 22 + src/policies/fifopolicy.rs | 81 ++-- src/policies/lfupolicy.rs | 29 +- src/policies/lrupolicy.rs | 29 +- src/policies/mod.rs | 1 + src/policies/nopolicy.rs | 29 +- src/policies/rrpolicy.rs | 29 +- src/policies/traits.rs | 47 +- src/policies/ttlpolicy.rs | 557 +++++++++++++++++++++++ src/policies/wrapped.rs | 33 +- src/pyclasses/cache.rs | 10 +- src/pyclasses/fifocache.rs | 8 +- src/pyclasses/lfucache.rs | 16 +- src/pyclasses/lrucache.rs | 16 +- src/pyclasses/mod.rs | 1 + src/pyclasses/rrcache.rs | 10 +- src/pyclasses/ttlcache.rs | 897 +++++++++++++++++++++++++++++++++++++ tests/mixins.py | 71 +++ tests/test_impls.py | 413 ++++++++++++++++- 26 files changed, 2585 insertions(+), 269 deletions(-) delete mode 100644 Makefile create mode 100644 src/policies/ttlpolicy.rs create mode 100644 src/pyclasses/ttlcache.rs diff --git a/Cargo.lock b/Cargo.lock index 7832ec4..1350f8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,17 +2,39 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + [[package]] name = "bitflags" version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "bumpalo" +version = "3.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" + [[package]] name = "cachebox" version = "6.0.0" dependencies = [ "cfg-if", + "chrono", "fastrand", "parking_lot", "pyo3", @@ -35,6 +57,25 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "fastrand" version = "2.4.1" @@ -47,12 +88,72 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "js-sys" +version = "0.3.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + [[package]] name = "libc" version = "0.2.186" @@ -68,6 +169,21 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -97,6 +213,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + [[package]] name = "portable-atomic" version = "1.13.1" @@ -118,6 +240,7 @@ version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ + "chrono", "libc", "once_cell", "portable-atomic", @@ -198,6 +321,12 @@ dependencies = [ "bitflags", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "scopeguard" version = "1.2.0" @@ -210,6 +339,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "smallvec" version = "1.15.1" @@ -239,8 +374,106 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "wasm-bindgen" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] diff --git a/Cargo.toml b/Cargo.toml index 44a90a7..e678008 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,9 +23,10 @@ strip = true [dependencies] cfg-if = "1.0.4" +chrono = "0.4.44" fastrand = "2.4.1" parking_lot = {version="0.12.5", default-features=false} -pyo3 = {version="0.28.3", default-features=false, features=["macros", "generate-import-lib"]} +pyo3 = {version="0.28.3", default-features=false, features=["macros", "generate-import-lib", "chrono"]} # tokio = {version="1.52.3", default-features=false, features=["sync"]} [build-dependencies] diff --git a/Makefile b/Makefile deleted file mode 100644 index 1b62907..0000000 --- a/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -help: - @echo "Commands:" - @echo -e "\tbuild-dev build source" - @echo -e "\tbuild-prod build source (release mode)" - @echo -e "\ttest-rs clippy and test rust code" - @echo -e "\ttest-py build and test python code" - @echo -e "\tformat format rust and python code" - @echo -e "\tclean clean all the unneeded files" - -.PHONY: build-test -build-test: - maturin develop --features "small-offset" - -.PHONY: build-dev -build-dev: - maturin develop - -.PHONY: build-prod -build-prod: - maturin develop --release - -.PHONY: test-rs -test-rs: - cargo clippy - cargo test -- --nocapture - -.PHONY: test-py -test-py: build-dev - coverage run -m pytest -s -vv - -rm -rf .pytest_cache - -ruff check . - ruff clean - coverage html - -.PHONY: format -format: - ruff format --line-length=100 . - ruff clean - cargo fmt - -.PHONY: clean -clean: - -rm -rf `find . -name __pycache__` - -rm -rf python/cachebox/*.so - -rm -rf target/release - -rm -rf .pytest_cache - -rm -rf .coverage - -rm -rf htmlcov - -ruff clean diff --git a/cachebox/__init__.py b/cachebox/__init__.py index bcd3b11..94495af 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -4,6 +4,7 @@ from ._core import LFUCache as LFUCache from ._core import LRUCache as LRUCache from ._core import RRCache as RRCache +from ._core import TTLCache as TTLCache try: from ._core import ( diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index b0fb326..7c9129e 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -1,4 +1,5 @@ import typing +from datetime import timedelta from _typeshed import SupportsItems @@ -876,3 +877,236 @@ class LFUCache(BaseCacheImpl[KT, VT]): - Do not use this method while using iterators. """ ... + +class TTLCache(BaseCacheImpl[KT, VT]): + """ + A Time-To-Live (TTL) cache eviction policy: each entry carries an expiration timestamp + and is considered stale — and eligible for eviction — once that deadline has passed, + regardless of how recently or frequently it was accessed. + + ## How It Works + The TTL algorithm pairs time-based expiration with insertion-order eviction. Every entry + is stamped with an absolute `expires_at` timestamp at insertion time (computed as + `now + global_ttl`). Entries are stored in insertion order, and eviction proceeds from the + front of that queue — but only after confirming the candidate has actually expired. A live + entry at the front of the queue blocks eviction of everything behind it, so the cache may + temporarily exceed capacity if the oldest entries are still fresh. + + Like `FIFOPolicy`, this implementation backs the queue with a `double-ended queue` for O(1) + front removal and a `hash map` for O(1) key lookups. The same logical-index trick applies: + the table stores monotonically increasing counters rather than physical deque positions, and + a `front_offset` counter converts a logical index back to a physical one at read time via + `entries[table[key] - front_offset]`. This keeps eviction and lookup O(1) without rewriting + the table on every eviction. On top of that, every read checks `expires_at` against the current wall-clock time and + treats any expired entry as a cache miss. + + Without `sweep_interval`, an expiry sweep is triggered automatically on every call to + `insert`, `update`, `current_size`, `remaining_size`, `last`, `first`, `items`, `keys`, + `values`, and `__iter__`. A completely idle cache will accumulate stale entries between + these calls, but any normal interaction with the cache is sufficient to reclaim them. + When `sweep_interval` is set, a background Rust thread performs the sweep on that interval + instead, reclaiming expired entries independent of any method calls. + + ### Pros + - Insert, lookup, and evict are all O(1) amortized: the `front_offset` trick eliminates the O(n) + index-shifting that a naïve implementation would require on every eviction. + - Entries expire automatically without any background thread or explicit invalidation call. + Stale data is never returned to the caller. + - TTL expiry and insertion-order eviction compose cleanly: the oldest entry is always evicted + first among those that have already expired. + - A single `global_ttl` keeps configuration simple; every entry ages at the same rate. + + ### Cons + + - Wall-clock dependency. Correctness relies on a monotonically advancing system clock. + Clock adjustments (NTP steps, suspend/resume) can cause entries to expire earlier or later + than intended. + - When `sweep_interval` is set, a background Rust thread wakes on that interval to sweep and + remove all expired entries. This adds a small amount of background CPU usage and + introduces a reaper thread for the lifetime of the cache. + - No per-entry TTL override. All entries share `global_ttl`; mixed expiry requirements need + a different policy or a wrapper layer. + - The rare O(n) index rebase (triggered when `front_offset` nears `usize::MAX - isize::MAX`) + introduces an occasional latency spike. Amortized cost is negligible, but worst-case + latency is unbounded in principle. + + ## When to use it + Reach for `TTLPolicy` when: + - Cached data has a natural freshness window: API responses, auth tokens, DNS records, + rate-limit counters, or any value that becomes incorrect or unsafe after a known interval. + - You need automatic expiry without a background reaper thread — expiry sweeps on common + method calls are sufficient, or you want continuous reclamation via `sweep_interval`. + - Access patterns are unpredictable or uniform enough that recency- or frequency-based + eviction (LRU/LFU) would offer no meaningful advantage. + + Avoid it when: + - Your workload has strong temporal locality and you need a best-effort hit rate policy — + LRU will serve you better. + - Per-entry TTL granularity is required. If different keys need different lifetimes, + consider `VTTLCache`. + - Your environment has an unreliable or adjustable system clock, where wall-clock-based + expiry may behave unexpectedly. + """ + + def __init__( + self, + maxsize: int, + global_ttl: float | timedelta, + iterable: _IterableType[KT, VT] | None = None, + *, + capacity: int = 0, + getsizeof: typing.Callable[[KT, VT]] | None = None, + sweep_interval: float | timedelta | None = None, + ) -> None: + """ + Initialize a new instance. + + Args: + maxsize: Maximum number of elements the cache can hold. If zero, the limit is set to sys.maxsize internally. + global_ttl: Time-to-live for every entry, either as seconds (float) or a timedelta. Applied at insertion time. + iterable: Initial data to populate the cache. + capacity: Pre-allocate cache capacity to minimize reallocations. Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. When `None`, each + entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + Use this to implement weighted caching — for example, sizing entries by + memory footprint or byte length. + sweep_interval: If set, starts a background Rust thread that sweeps and removes all expired entries on this interval. + When None, expiry is lazy. Defaults to `None`. + + The cache can be pre-sized via `capacity` to reduce reallocations when + the number of expected entries is known ahead of time. + """ + ... + + @property + def global_ttl(self) -> float: + """Returns the specified `global_ttl`""" + ... + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + + It's recommended to use this method instead of `self[key] = value`, as it keeps code + compatible across different cache policies. + """ + ... + + def update(self, iterable: _IterableType[KT, VT]) -> None: + """ + Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts key with a value of default if key is not in the cache. + + Returns the value for key if key is in the cache, else default. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes the element that has been in the cache the longest. + """ + ... + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + - Items are ordered. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Keys are ordered. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Values are ordered. + """ + ... + + def first(self, n: int = 0) -> typing.Optional[KT]: + """ + Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). + By using `n` parameter, you can browse order index by index. + + Raises `IndexError` if cache is empty, or `n` is out of range. + """ + ... + + def last(self) -> typing.Optional[KT]: + """ + Returns the last key in cache. Equals to `self.first(-1)`. + + Raises `IndexError` if cache is empty. + """ + ... + + def expire(self, *, reuse: bool = False) -> None: + """ + Manually removes expired key-value pairs from cache. + + If `reuse` is True, will not free the memory for reusing in the future. + """ + ... + + def get_with_expire( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Tuple[typing.Union[VT, DT], float]: + """ + Works exactly like `.get()`, but also returns expiration duration for a given key from the cache (or 0.0 if not found). + """ + ... + + def pop_with_expire( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Tuple[typing.Union[VT, DT], float]: + """ + Works exactly like `.pop()`, but also returns expiration duration for a given key from the cache (or 0.0 if not found). + """ + ... + + def popitem_with_expire(self) -> typing.Tuple[VT, DT, float]: + """ + Works exactly like `.popitem()`, but also returns expiration duration for a given key from the cache. + """ + ... + + def items_with_expire(self) -> typing.Iterable[typing.Tuple[KT, VT, float]]: + """ + Returns an iterable object of the cache's items (key-value pairs) with their expiration duration. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Items are ordered. + """ + ... diff --git a/src/internal/utils.rs b/src/internal/utils.rs index 74772a6..c55764a 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -206,19 +206,35 @@ pub unsafe fn get_type_name<'a>(py: pyo3::Python<'a>, obj: *mut pyo3::ffi::PyObj /// otherwise you will get [`OptionalArgument::Undefined`]. /// /// It can be used instead of [`Option`] to improve performance. -#[derive(Debug, Clone)] -pub enum OptionalArgument<'a> { +#[derive(Debug)] +pub enum OptionalArgument { /// The argument was not provided by the caller. Undefined, /// The argument was provided and holds the bound Python object. - Defined(pyo3::Bound<'a, pyo3::PyAny>), + Defined(alias::PyObject), } -impl<'a, 'py> pyo3::FromPyObject<'a, 'py> for OptionalArgument<'py> { +impl<'a, 'py> pyo3::FromPyObject<'a, 'py> for OptionalArgument { type Error = pyo3::PyErr; fn extract(obj: pyo3::Borrowed<'a, 'py, pyo3::PyAny>) -> Result { - Ok(Self::Defined(obj.to_owned())) + Ok(Self::Defined(obj.to_owned().unbind())) + } +} + +#[derive(pyo3::FromPyObject)] +pub enum FloatOrTimedelta { + Float(f64), + Timedelta(chrono::Duration), +} + +impl From for f64 { + #[inline] + fn from(value: FloatOrTimedelta) -> Self { + match value { + FloatOrTimedelta::Float(x) => x, + FloatOrTimedelta::Timedelta(x) => x.as_seconds_f64(), + } } } diff --git a/src/lib.rs b/src/lib.rs index d50fc12..d068a58 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,6 +71,15 @@ mod _core { #[pymodule_export] use crate::pyclasses::lfucache::PyLFUCacheValues; + #[pymodule_export] + use crate::pyclasses::ttlcache::PyTTLCache; + #[pymodule_export] + use crate::pyclasses::ttlcache::PyTTLCacheItems; + #[pymodule_export] + use crate::pyclasses::ttlcache::PyTTLCacheKeys; + #[pymodule_export] + use crate::pyclasses::ttlcache::PyTTLCacheValues; + #[pymodule_init] pub fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { typeref::initialize_typeref(m.py()); diff --git a/src/policies/common.rs b/src/policies/common.rs index ecfc9a6..ecb04af 100644 --- a/src/policies/common.rs +++ b/src/policies/common.rs @@ -106,16 +106,32 @@ pub struct Shared { gv: utils::GenerationVersion, /// Callable used to measure size of each key-value pair. getsizeof: utils::GetsizeofFunction, + /// Global time-to-live for cache entries. This is for *TTL* implementations. + global_ttl: Option, } impl Shared { /// Creates a new [`Shared`]. #[inline] pub fn new(maxsize: usize, getsizeof: Option) -> Self { + unsafe { Self::with_ttl(maxsize, getsizeof, None) } + } + + /// Creates a new [`Shared`] with configured TTL. + /// + /// # Safety + /// `ttl` should not be negative or zero. + #[inline] + pub unsafe fn with_ttl( + maxsize: usize, + getsizeof: Option, + ttl: Option, + ) -> Self { Self { maxsize: safe_non_zero!(maxsize), gv: utils::GenerationVersion::default(), getsizeof: utils::GetsizeofFunction::new(getsizeof), + global_ttl: ttl.map(std::time::Duration::from_secs_f64), } } } @@ -136,11 +152,17 @@ impl traits::SharedExt for Shared { &self.getsizeof } + #[inline] + fn global_ttl(&self) -> Option { + self.global_ttl + } + fn clone_ref(&self, py: pyo3::Python) -> Self { Self { maxsize: self.maxsize, gv: Default::default(), getsizeof: self.getsizeof.clone_ref(py), + global_ttl: self.global_ttl, } } } diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index e0f8436..bfd696c 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -10,6 +10,15 @@ use crate::policies::traits::SharedExt; pub use super::common::Handle; pub use super::common::Shared; +macro_rules! get_handle { + (&$slf:expr, $index:expr) => { + &$slf.entries[$index - $slf.front_offset] + }; + (&mut $slf:expr, $index:expr) => { + &mut $slf.entries[$index - $slf.front_offset] + }; +} + /// A view into an occupied entry in [`FIFOPolicy`]. pub struct Occupied<'a> { /// The parent storage that owns the hash table. @@ -20,36 +29,16 @@ pub struct Occupied<'a> { bucket: hashbrown::raw::Bucket, } -impl traits::EntryExt for Occupied<'_> { +impl traits::OccupiedExt for Occupied<'_> { type Handle = Handle; type Shared = Shared; - #[inline] - fn would_exceed(&self, extra_size: usize) -> bool { - let handle = - unsafe { &self.policy.entries[*self.bucket.as_ref() - self.policy.front_offset] }; - - self.policy - .currsize - .saturating_add(extra_size) - .saturating_sub(handle.size()) - > self.shared.maxsize() - } - - #[inline] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { - self.policy.evict(py, self.shared) - } -} - -impl traits::OccupiedExt for Occupied<'_> { #[inline] fn replace(self, new: Self::Handle) -> Self::Handle { // In update we don't need to increment this; because this does not change the memory address ranges // self.shared.generation_version().increment(); - let index = unsafe { *self.bucket.as_ref() }; - let item = &mut self.policy.entries[index - self.policy.front_offset]; + let item = unsafe { get_handle!(&mut self.policy, *self.bucket.as_ref()) }; self.policy.currsize = self .policy @@ -62,6 +51,8 @@ impl traits::OccupiedExt for Occupied<'_> { #[inline] fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + let (mut index, _) = unsafe { self.policy.table.remove(self.bucket) }; index -= self.policy.front_offset; @@ -82,7 +73,7 @@ pub struct Vacant<'a> { shared: &'a Shared, } -impl traits::EntryExt for Vacant<'_> { +impl traits::VacantExt for Vacant<'_> { type Handle = Handle; type Shared = Shared; @@ -92,12 +83,11 @@ impl traits::EntryExt for Vacant<'_> { } #[inline] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { - self.policy.evict(py, self.shared) + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { + self.policy.evict(py, self.shared)?; + Ok(()) } -} -impl traits::VacantExt for Vacant<'_> { fn insert(self, handle: Self::Handle) { self.shared.generation_version().increment(); @@ -106,11 +96,7 @@ impl traits::VacantExt for Vacant<'_> { self.policy.table.insert( handle.key().hash(), self.policy.entries.len() + self.policy.front_offset, - |index| { - self.policy.entries[(*index) - self.policy.front_offset] - .key() - .hash() - }, + |index| get_handle!(&self.policy, *index).key().hash(), ); self.policy.entries.push_back(handle); } @@ -265,13 +251,9 @@ impl PolicyExt for FIFOPolicy { py: pyo3::Python, key: &::Key, ) -> pyo3::PyResult> { - let eq = |index: &usize| { - self.entries[(*index) - self.front_offset] - .key() - .py_eq(py, key) - }; + let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, key); match self.table.get(key.hash(), eq)? { - Some(index) => Ok(Some(&self.entries[(*index) - self.front_offset])), + Some(index) => Ok(Some(get_handle!(&self, *index))), None => Ok(None), } } @@ -282,11 +264,7 @@ impl PolicyExt for FIFOPolicy { key: &::Key, shared: &'a Self::Shared, ) -> pyo3::PyResult, Self::Vacant<'a>>> { - let eq = |index: &usize| { - self.entries[(*index) - self.front_offset] - .key() - .py_eq(py, key) - }; + let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, key); match self.table.find(key.hash(), eq)? { Some(bucket) => { let result = Occupied { @@ -337,9 +315,9 @@ impl PolicyExt for FIFOPolicy { fn shrink_to_fit(&mut self, shared: &Self::Shared) { shared.generation_version().increment(); - self.table.shrink_to(0, |index| { - self.entries[(*index) - self.front_offset].key().hash() - }); + self.table + .shrink_to(0, |index| get_handle!(&self, *index).key().hash()); + self.entries.shrink_to_fit(); } @@ -349,10 +327,11 @@ impl PolicyExt for FIFOPolicy { return; } + shared.generation_version().increment(); self.table.clear(); self.entries.clear(); - shared.generation_version().increment(); self.currsize = 0; + self.front_offset = 0; } fn py_eq( @@ -371,12 +350,10 @@ impl PolicyExt for FIFOPolicy { let mut iterator = self.table.iter().map(|x| x.as_ref()); iterator.all(|index_1| { - let handle_1 = &self.entries[(*index_1) - self.front_offset]; + let handle_1 = get_handle!(&self, *index_1); let result = other.table.get(handle_1.key().hash(), |index| { - handle_1 - .key() - .py_eq(py, other.entries[(*index) - other.front_offset].key()) + handle_1.key().py_eq(py, get_handle!(&other, *index).key()) }); match result { @@ -387,7 +364,7 @@ impl PolicyExt for FIFOPolicy { } Ok(None) => false, Ok(Some(index_2)) => { - let handle_2 = &other.entries[(*index_2) - other.front_offset]; + let handle_2 = get_handle!(&other, *index_2); let value_1 = handle_1.value(); let value_2 = handle_2.value(); diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs index 47305e3..7a95a4d 100644 --- a/src/policies/lfupolicy.rs +++ b/src/policies/lfupolicy.rs @@ -135,28 +135,10 @@ pub struct Occupied<'a> { bucket: hashbrown::raw::Bucket>, } -impl traits::EntryExt for Occupied<'_> { +impl traits::OccupiedExt for Occupied<'_> { type Handle = FrequencyHandle; type Shared = Shared; - #[inline] - fn would_exceed(&self, extra_size: usize) -> bool { - let handle = unsafe { self.bucket.as_ref().element() }; - - self.policy - .currsize - .saturating_add(extra_size) - .saturating_sub(handle.size()) - > self.shared.maxsize() - } - - #[inline] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { - self.policy.evict(py, self.shared) - } -} - -impl traits::OccupiedExt for Occupied<'_> { fn replace(self, new: Self::Handle) -> Self::Handle { // Here we don't need to increment generation version // self.shared.generation_version().increment(); @@ -202,7 +184,7 @@ pub struct Vacant<'a> { shared: &'a Shared, } -impl traits::EntryExt for Vacant<'_> { +impl traits::VacantExt for Vacant<'_> { type Handle = FrequencyHandle; type Shared = Shared; @@ -212,12 +194,11 @@ impl traits::EntryExt for Vacant<'_> { } #[inline] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { - self.policy.evict(py, self.shared) + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { + self.policy.evict(py, self.shared)?; + Ok(()) } -} -impl traits::VacantExt for Vacant<'_> { fn insert(self, handle: Self::Handle) { self.shared.generation_version().increment(); diff --git a/src/policies/lrupolicy.rs b/src/policies/lrupolicy.rs index def7d84..6f13243 100644 --- a/src/policies/lrupolicy.rs +++ b/src/policies/lrupolicy.rs @@ -19,28 +19,10 @@ pub struct Occupied<'a> { bucket: hashbrown::raw::Bucket>, } -impl traits::EntryExt for Occupied<'_> { +impl traits::OccupiedExt for Occupied<'_> { type Handle = Handle; type Shared = Shared; - #[inline] - fn would_exceed(&self, extra_size: usize) -> bool { - let handle = unsafe { self.bucket.as_ref().element() }; - - self.policy - .currsize - .saturating_add(extra_size) - .saturating_sub(handle.size()) - > self.shared.maxsize() - } - - #[inline] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { - self.policy.evict(py, self.shared) - } -} - -impl traits::OccupiedExt for Occupied<'_> { fn replace(self, new: Self::Handle) -> Self::Handle { self.shared.generation_version().increment(); @@ -80,7 +62,7 @@ pub struct Vacant<'a> { shared: &'a Shared, } -impl traits::EntryExt for Vacant<'_> { +impl traits::VacantExt for Vacant<'_> { type Handle = Handle; type Shared = Shared; @@ -90,12 +72,11 @@ impl traits::EntryExt for Vacant<'_> { } #[inline] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { - self.policy.evict(py, self.shared) + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { + self.policy.evict(py, self.shared)?; + Ok(()) } -} -impl traits::VacantExt for Vacant<'_> { fn insert(self, handle: Self::Handle) { self.shared.generation_version().increment(); diff --git a/src/policies/mod.rs b/src/policies/mod.rs index 257fa96..aaf874c 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -6,5 +6,6 @@ pub mod lfupolicy; pub mod lrupolicy; pub mod nopolicy; pub mod rrpolicy; +pub mod ttlpolicy; pub mod wrapped; diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 391cace..86babb3 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -18,28 +18,10 @@ pub struct Occupied<'a> { bucket: hashbrown::raw::Bucket, } -impl traits::EntryExt for Occupied<'_> { +impl traits::OccupiedExt for Occupied<'_> { type Shared = Shared; type Handle = Handle; - #[inline] - fn would_exceed(&self, extra_size: usize) -> bool { - let handle = unsafe { self.bucket.as_ref() }; - - self.policy - .currsize - .saturating_add(extra_size) - .saturating_sub(handle.size()) - > self.shared.maxsize() - } - - #[inline(always)] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { - self.policy.evict(py, self.shared) - } -} - -impl traits::OccupiedExt for Occupied<'_> { fn remove(self) -> Self::Handle { self.shared.generation_version().increment(); @@ -68,7 +50,7 @@ pub struct Vacant<'a> { space_available: bool, } -impl traits::EntryExt for Vacant<'_> { +impl traits::VacantExt for Vacant<'_> { type Shared = Shared; type Handle = Handle; @@ -78,12 +60,11 @@ impl traits::EntryExt for Vacant<'_> { } #[inline(always)] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { - self.policy.evict(py, self.shared) + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { + self.policy.evict(py, self.shared)?; + Ok(()) } -} -impl traits::VacantExt for Vacant<'_> { fn insert(self, handle: Self::Handle) { self.shared.generation_version().increment(); self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); diff --git a/src/policies/rrpolicy.rs b/src/policies/rrpolicy.rs index f1cef32..871d6dd 100644 --- a/src/policies/rrpolicy.rs +++ b/src/policies/rrpolicy.rs @@ -18,28 +18,10 @@ pub struct Occupied<'a> { bucket: hashbrown::raw::Bucket, } -impl traits::EntryExt for Occupied<'_> { +impl traits::OccupiedExt for Occupied<'_> { type Shared = Shared; type Handle = Handle; - #[inline] - fn would_exceed(&self, extra_size: usize) -> bool { - let handle = unsafe { self.bucket.as_ref() }; - - self.policy - .currsize - .saturating_add(extra_size) - .saturating_sub(handle.size()) - > self.shared.maxsize() - } - - #[inline(always)] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { - self.policy.evict(py, self.shared) - } -} - -impl traits::OccupiedExt for Occupied<'_> { #[inline] fn remove(self) -> Self::Handle { self.shared.generation_version().increment(); @@ -70,7 +52,7 @@ pub struct Vacant<'a> { space_available: bool, } -impl traits::EntryExt for Vacant<'_> { +impl traits::VacantExt for Vacant<'_> { type Shared = Shared; type Handle = Handle; @@ -80,12 +62,11 @@ impl traits::EntryExt for Vacant<'_> { } #[inline(always)] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult { - self.policy.evict(py, self.shared) + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { + self.policy.evict(py, self.shared)?; + Ok(()) } -} -impl traits::VacantExt for Vacant<'_> { fn insert(self, handle: Self::Handle) { self.shared.generation_version().increment(); self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); diff --git a/src/policies/traits.rs b/src/policies/traits.rs index d3a5153..81274fa 100644 --- a/src/policies/traits.rs +++ b/src/policies/traits.rs @@ -13,41 +13,39 @@ pub trait HandleExt { fn size(&self) -> usize; } -/// Shared behaviour for occupied and vacant entry guards. -/// -/// Both variants hold a mutable borrow of the parent policy, so budget checks -/// and eviction go through the entry rather than through the policy directly. -pub trait EntryExt { +/// Guard for an *occupied* slot. +pub trait OccupiedExt { + type Shared: SharedExt; + type Handle: HandleExt; + + /// Replaces the current handle with `new`, returning the old one. + fn replace(self, new: Self::Handle) -> Self::Handle; + + /// Removes the handle from this slot and returns it. + fn remove(self) -> Self::Handle; +} + +/// Guard for a *vacant* slot. +pub trait VacantExt { type Shared: SharedExt; type Handle: HandleExt; /// Returns `true` if adding `extra_size` would meet or exceed [`SharedExt::maxsize`]. + /// Called *before* [`VacantExt::insert`]. /// - /// Call this *before* [`OccupiedExt::replace`] or [`VacantExt::insert`]. + /// This method is exists here because after calling [`PolicyExt::entry`], we can't use + /// policy. fn would_exceed(&self, extra_size: usize) -> bool; /// Evicts one entry, freeing budget for a subsequent insert or replace. /// - /// # Errors + /// This method is exists here because after calling [`PolicyExt::entry`], we can't use + /// policy. /// + /// # Errors /// Returns any Python exception raised while dropping the evicted value. - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult; -} - -/// Guard for an *occupied* slot. -pub trait OccupiedExt: EntryExt { - /// Replaces the current handle with `new`, returning the old one. - /// - /// Does **not** enforce maxsize; call - /// [`would_exceed`](EntryExt::would_exceed) first. - fn replace(self, new: Self::Handle) -> Self::Handle; + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()>; - /// Removes the handle from this slot and returns it. - fn remove(self) -> Self::Handle; -} - -/// Guard for a *vacant* slot. -pub trait VacantExt: EntryExt { /// Inserts `handle` into this slot. /// /// Does **not** enforce the weight budget; call @@ -71,6 +69,9 @@ pub trait SharedExt: Send + Sync { /// Returns a reference to configued getsizeof function. fn getsizeof(&self) -> &utils::GetsizeofFunction; + /// Returns a reference to configued getsizeof function. + fn global_ttl(&self) -> Option; + /// Make a clone of `self`. fn clone_ref(&self, py: pyo3::Python) -> Self; } diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs new file mode 100644 index 0000000..34e4db5 --- /dev/null +++ b/src/policies/ttlpolicy.rs @@ -0,0 +1,557 @@ +use std::collections::VecDeque; + +use crate::hashbrown; +use crate::internal::alias; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; + +pub use crate::policies::common::Shared; + +macro_rules! get_handle { + (&$slf:expr, $index:expr) => { + &$slf.entries[$index - $slf.front_offset] + }; + (&mut $slf:expr, $index:expr) => { + &mut $slf.entries[$index - $slf.front_offset] + }; +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum ExpiresAt { + SystemTime(std::time::SystemTime), + Duration(std::time::Duration), +} + +impl From for ExpiresAt { + #[inline] + fn from(value: std::time::Duration) -> Self { + Self::Duration(value) + } +} + +impl From for std::time::SystemTime { + #[inline] + fn from(value: ExpiresAt) -> Self { + match value { + ExpiresAt::Duration(x) => std::time::SystemTime::now() + x, + ExpiresAt::SystemTime(x) => x, + } + } +} + +/// A key-value pair with a precomputed hash and combined size. +pub struct ExpiringHandle { + /// The cache key together with its precomputed hash, avoiding repeated + /// Python hash calls during table lookups. + key: utils::PrecomputedHashObject, + /// The cached value associated with this key. + value: alias::PyObject, + /// Size of the key and value as reported by `getsizeof`. + size: usize, + /// Configured ttl for handle. + expires_at: std::time::SystemTime, +} + +impl ExpiringHandle { + /// Creates a new [`Handle`], which calculates the precomputed hash itself. + #[inline] + pub fn new( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + expires_at: ExpiresAt, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + Self::with_precomputed_hash_key( + py, + getsizeof, + expires_at, + utils::PrecomputedHashObject::new(py, key)?, + value, + ) + } + + /// Creates a new [`Handle`] from an already-hashed key. + /// + /// Prefer this over [`Handle::new`] when the caller has already paid the cost + /// of computing the hash (e.g. during a table lookup that preceded insertion). + #[inline] + pub fn with_precomputed_hash_key( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + expires_at: ExpiresAt, + key: utils::PrecomputedHashObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + let size = getsizeof.call(py, key.as_ref(), &value)?; + Ok(Self { + key, + value, + size, + expires_at: expires_at.into(), + }) + } + + /// Consumes `self` and returns the [`utils::PrecomputedHashObject`]. + #[inline] + pub fn into_key(self) -> utils::PrecomputedHashObject { + self.key + } + + /// Returns a reference to the value. + #[inline] + pub fn value(&self) -> &alias::PyObject { + &self.value + } + + /// Consumes `self` and returns the value of the pair. + #[inline] + pub fn into_value(self) -> alias::PyObject { + self.value + } + + /// Consumes `self` and returns the pair. + #[inline] + pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { + (self.key, self.value) + } + + #[inline] + pub fn expires_at(&self) -> std::time::SystemTime { + self.expires_at + } + + #[inline] + pub fn is_expired(&self, now: std::time::SystemTime) -> bool { + self.expires_at <= now + } + + /// Makes a clone of self. + /// + /// This creates another pointer to the same object, increasing its reference count. + #[inline] + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self { + key: self.key.clone_ref(py), + value: self.value.clone_ref(py), + size: self.size, + expires_at: self.expires_at, + } + } +} + +impl HandleExt for ExpiringHandle { + type Key = utils::PrecomputedHashObject; + + #[inline(always)] + fn key(&self) -> &utils::PrecomputedHashObject { + &self.key + } + + #[inline(always)] + fn size(&self) -> usize { + self.size + } +} + +/// A view into an occupied entry in [`TTLPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut TTLPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied index. + bucket: hashbrown::raw::Bucket, +} + +impl traits::OccupiedExt for Occupied<'_> { + type Handle = ExpiringHandle; + type Shared = Shared; + + #[inline] + fn replace(self, new: Self::Handle) -> Self::Handle { + // In update we don't need to increment this; because this does not change the memory address ranges + // self.shared.generation_version().increment(); + + let item = unsafe { get_handle!(&mut self.policy, *self.bucket.as_ref()) }; + + self.policy.currsize = self + .policy + .currsize + .saturating_sub(item.size()) + .saturating_add(new.size()); + + std::mem::replace(item, new) + } + + #[inline] + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + + let (mut index, _) = unsafe { self.policy.table.remove(self.bucket) }; + index -= self.policy.front_offset; + + self.policy + .decrement_indexes(index + 1, self.policy.entries.len()); + + let handle = self.policy.entries.remove(index).unwrap(); + self.policy.currsize = self.policy.currsize.saturating_sub(handle.size()); + handle + } +} + +/// A view into a vacant slot in [`TTLPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut TTLPolicy, + /// The shared configuration + shared: &'a Shared, +} + +impl traits::VacantExt for Vacant<'_> { + type Handle = ExpiringHandle; + type Shared = Shared; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + #[inline] + fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { + self.policy.evict(py, self.shared)?; + Ok(()) + } + + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + self.policy.table.insert( + handle.key().hash(), + self.policy.entries.len() + self.policy.front_offset, + |index| get_handle!(&self.policy, *index).key().hash(), + ); + self.policy.entries.push_back(handle); + } +} + +pub struct TTLPolicy { + // fields are same as FIFOPolicy + table: hashbrown::raw::RawTable, + entries: VecDeque, + currsize: usize, + front_offset: usize, +} + +impl TTLPolicy { + /// Creates a new [`TTLPolicy`]. + /// + /// The underlying [`VecDeque`] is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + entries: VecDeque::with_capacity(capacity), + currsize: 0, + front_offset: 0, + } + } + + #[inline] + pub fn table(&self) -> &hashbrown::raw::RawTable { + &self.table + } + + #[inline] + pub fn entries(&self) -> &VecDeque { + &self.entries + } + + #[inline] + fn decrement_indexes(&mut self, start: usize, end: usize) { + #[cfg(not(feature = "small-offset"))] + const MAX_FRONT_OFFSET: usize = usize::MAX - isize::MAX as usize; + + #[cfg(feature = "small-offset")] + const MAX_FRONT_OFFSET: usize = u8::MAX as usize; + + // Fast path: shifting the entire front is a single counter increment. + // Guard against overflow; the full-normalization path below handles that case. + if start <= 1 && end == self.entries.len() && self.front_offset < MAX_FRONT_OFFSET { + self.front_offset += 1; + return; + } + + // Snapshot so the borrow checker doesn't complain about `self` inside the loops. + let fo = self.front_offset; + + if (end - start) > self.table.num_buckets() / 2 { + // Table-scan path: already O(n), so fold normalization in for free. + // One pass: normalize every index (subtract fo) and decrement those in [start, end). + unsafe { + for bucket in self.table.iter() { + let i = bucket.as_mut(); + let vd_idx = *i - fo; // raw VecDeque index + *i = if start <= vd_idx && vd_idx < end { + vd_idx - 1 // normalize + decrement + } else { + vd_idx // normalize only + }; + } + } + } else { + // Entries-scan path: O(range) decrement pass, then O(n) normalization pass. + // + // Pass 1: decrement the logical indices for entries in [start, end). + let shifted = self.entries.range(start..end); + for (i, entry) in (start..end).zip(shifted) { + let result = unsafe { + self.table + .get_mut(entry.key().hash(), |x| Ok::<_, pyo3::PyErr>((*x) - fo == i)) + .unwrap_unchecked() + }; + *result.expect("index not found") -= 1; + } + + // Pass 2: normalize every stored index by subtracting `fo`. + // • Entries in [start, end): (vd_idx + fo - 1) - fo = vd_idx - 1 + // • All others: (vd_idx + fo) - fo = vd_idx + if fo != 0 { + unsafe { + for bucket in self.table.iter() { + *bucket.as_mut() -= fo; + } + } + } + } + + // Both branches now store raw VecDeque indices, so the offset is zero. + self.front_offset = 0; + } + + pub fn expire(&mut self, py: pyo3::Python<'_>, shared: &Shared) -> pyo3::PyResult<()> { + let now = std::time::SystemTime::now(); + + while let Some(handle) = self.entries.front() { + if !handle.is_expired(now) { + break; + } + + let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, handle.key()); + if std::hint::unlikely(self.table.remove_entry(handle.key().hash(), eq)?.is_none()) { + unreachable!("popitem key not found in table"); + } + + shared.generation_version().increment(); + + let front = unsafe { self.entries.pop_front().unwrap_unchecked() }; + + self.currsize = self.currsize.saturating_sub(front.size()); + self.decrement_indexes(1, self.entries.len()); + } + + Ok(()) + } + + #[inline] + pub fn iter( + &mut self, + py: pyo3::Python<'_>, + shared: &Shared, + ) -> pyo3::PyResult> { + self.expire(py, shared)?; + + let (first, second) = self.entries.as_slices(); + Ok(utils::RawVecDequeIter::new(first, second)) + } +} + +impl PolicyExt for TTLPolicy { + type Shared = Shared; + type Handle = ExpiringHandle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, key); + + match self + .table + .get(key.hash(), eq)? + .map(|index| get_handle!(&self, *index)) + { + Some(handle) => { + if handle.is_expired(std::time::SystemTime::now()) { + Ok(None) + } else { + Ok(Some(handle)) + } + } + None => Ok(None), + } + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + self.expire(py, shared)?; + + let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, key); + match self.table.find(key.hash(), eq)? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + fn evict(&mut self, _py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { + let front = self.entries.pop_front(); + if front.is_none() { + return Err(new_py_error!(PyKeyError, "cache is empty")); + } + + let front = unsafe { front.unwrap_unchecked() }; + + let eq = |index: &usize| Ok::<_, pyo3::PyErr>((*index - self.front_offset) == 0); + if std::hint::unlikely(self.table.remove_entry(front.key().hash(), eq)?.is_none()) { + unreachable!("popitem key not found in table"); + } + + shared.generation_version().increment(); + + self.currsize = self.currsize.saturating_sub(front.size()); + self.decrement_indexes(1, self.entries.len()); + Ok(front) + } + + #[inline] + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + shared.generation_version().increment(); + + self.table + .shrink_to(0, |index| get_handle!(&self, *index).key().hash()); + self.entries.shrink_to_fit(); + } + + #[inline] + fn clear(&mut self, shared: &Self::Shared) { + if self.entries.is_empty() { + return; + } + + shared.generation_version().increment(); + self.table.clear(); + self.entries.clear(); + self.currsize = 0; + self.front_offset = 0; + } + + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() + || shared.global_ttl() != other_shared.global_ttl() + || self.table.len() != other.table.len() + { + return Ok(false); + } + + let mut error = None; + let result = unsafe { + let mut iterator = self.table.iter().map(|x| x.as_ref()); + + iterator.all(|index_1| { + let handle_1 = get_handle!(&self, *index_1); + + let result = other.table.get(handle_1.key().hash(), |index| { + handle_1.key().py_eq(py, get_handle!(&other, *index).key()) + }); + + match result { + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + Ok(None) => false, + Ok(Some(index_2)) => { + let handle_2 = get_handle!(&other, *index_2); + + let value_1 = handle_1.value(); + let value_2 = handle_2.value(); + + match utils::pyobject_equal(py, value_1.as_ptr(), value_2.as_ptr()) { + Ok(result) => result, + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + } + } + } + }) + }; + + if let Some(error) = error { + return Err(error); + } + Ok(result) + } + + fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { + let mut entries = VecDeque::with_capacity(self.entries.len()); + for handle in self.entries.iter() { + entries.push_back(handle.clone_ref(py)); + } + + Self { + table: self.table.clone(), + entries, + currsize: self.currsize, + front_offset: self.front_offset, + } + } +} diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs index dcee788..8966018 100644 --- a/src/policies/wrapped.rs +++ b/src/policies/wrapped.rs @@ -1,5 +1,4 @@ use crate::internal::alias; -use crate::policies::traits::EntryExt; use crate::policies::traits::HandleExt; use crate::policies::traits::OccupiedExt; use crate::policies::traits::PolicyEntry; @@ -58,26 +57,36 @@ fn insert_inner( py: pyo3::Python<'_>, handle: P::Handle, ) -> pyo3::PyResult> { - let entry = lock.entry(py, handle.key(), shared)?; - match entry { - PolicyEntry::Occupied(mut occupied) => { - // Evict if need - while occupied.would_exceed(handle.size()) { - occupied.evict(py)?; - } + let handle_size = handle.size(); - Ok(Some(occupied.replace(handle))) - } + if handle_size > shared.maxsize() { + return Err(new_py_error!( + PyOverflowError, + "handle size is more than the configured maximum size" + )); + } + + let result = match lock.entry(py, handle.key(), shared)? { + PolicyEntry::Occupied(occupied) => Some(occupied.replace(handle)), PolicyEntry::Vacant(mut vacant) => { // Evict if need - while vacant.would_exceed(handle.size()) { + while vacant.would_exceed(handle_size) { vacant.evict(py)?; } vacant.insert(handle); - Ok(None) + None + } + }; + + if result.is_some() { + // For the `PolicyEntry::Occupied` case, evict after replacement + while lock.current_size() > shared.maxsize() { + lock.evict(py, shared)?; } } + + Ok(result) } // Duplicate methods across all policies diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index 3ea4096..d6135c7 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -264,11 +264,11 @@ impl PyCache { /// Returns: /// The value associated with the key, or the default value if the key is not found. #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] - fn get<'p>( + fn get( &self, py: pyo3::Python, key: alias::PyObject, - default: utils::OptionalArgument<'p>, + default: utils::OptionalArgument, ) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; @@ -280,7 +280,7 @@ impl PyCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) @@ -332,7 +332,7 @@ impl PyCache { drop(policy); let default_object = match default { - utils::OptionalArgument::Defined(x) => x.unbind(), + utils::OptionalArgument::Defined(x) => x, utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() @@ -369,7 +369,7 @@ impl PyCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => Err(new_py_error!( PyKeyError, Into::::into(key) diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index 65b01f4..dc84788 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -275,7 +275,7 @@ impl PyFIFOCache { &self, py: pyo3::Python, key: alias::PyObject, - default: utils::OptionalArgument<'p>, + default: utils::OptionalArgument, ) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; @@ -287,7 +287,7 @@ impl PyFIFOCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) @@ -339,7 +339,7 @@ impl PyFIFOCache { drop(policy); let default_object = match default { - utils::OptionalArgument::Defined(x) => x.unbind(), + utils::OptionalArgument::Defined(x) => x, utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() @@ -376,7 +376,7 @@ impl PyFIFOCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => Err(new_py_error!( PyKeyError, Into::::into(key) diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs index a895114..59868e5 100644 --- a/src/pyclasses/lfucache.rs +++ b/src/pyclasses/lfucache.rs @@ -289,11 +289,11 @@ impl PyLFUCache { /// Returns: /// The value associated with the key, or the default value if the key is not found. #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] - fn get<'p>( + fn get( &self, py: pyo3::Python, key: alias::PyObject, - default: utils::OptionalArgument<'p>, + default: utils::OptionalArgument, ) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; @@ -305,7 +305,7 @@ impl PyLFUCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) @@ -357,7 +357,7 @@ impl PyLFUCache { drop(policy); let default_object = match default { - utils::OptionalArgument::Defined(x) => x.unbind(), + utils::OptionalArgument::Defined(x) => x, utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() @@ -395,7 +395,7 @@ impl PyLFUCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => Err(new_py_error!( PyKeyError, Into::::into(key) @@ -635,11 +635,11 @@ impl PyLFUCache { } #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] - fn peek<'p>( + fn peek( &self, py: pyo3::Python, key: alias::PyObject, - default: utils::OptionalArgument<'p>, + default: utils::OptionalArgument, ) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; @@ -651,7 +651,7 @@ impl PyLFUCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs index a0b2ec0..cfcefad 100644 --- a/src/pyclasses/lrucache.rs +++ b/src/pyclasses/lrucache.rs @@ -298,11 +298,11 @@ impl PyLRUCache { /// Returns: /// The value associated with the key, or the default value if the key is not found. #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] - fn get<'p>( + fn get( &self, py: pyo3::Python, key: alias::PyObject, - default: utils::OptionalArgument<'p>, + default: utils::OptionalArgument, ) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; @@ -314,7 +314,7 @@ impl PyLRUCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) @@ -366,7 +366,7 @@ impl PyLRUCache { drop(policy); let default_object = match default { - utils::OptionalArgument::Defined(x) => x.unbind(), + utils::OptionalArgument::Defined(x) => x, utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() @@ -403,7 +403,7 @@ impl PyLRUCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => Err(new_py_error!( PyKeyError, Into::::into(key) @@ -604,11 +604,11 @@ impl PyLRUCache { } #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] - fn peek<'p>( + fn peek( &self, py: pyo3::Python, key: alias::PyObject, - default: utils::OptionalArgument<'p>, + default: utils::OptionalArgument, ) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; @@ -620,7 +620,7 @@ impl PyLRUCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) diff --git a/src/pyclasses/mod.rs b/src/pyclasses/mod.rs index fa1eeea..70785ed 100644 --- a/src/pyclasses/mod.rs +++ b/src/pyclasses/mod.rs @@ -4,3 +4,4 @@ pub mod fifocache; pub mod lfucache; pub mod lrucache; pub mod rrcache; +pub mod ttlcache; diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs index e6149b2..30b4039 100644 --- a/src/pyclasses/rrcache.rs +++ b/src/pyclasses/rrcache.rs @@ -269,11 +269,11 @@ impl PyRRCache { /// Returns: /// The value associated with the key, or the default value if the key is not found. #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] - fn get<'p>( + fn get( &self, py: pyo3::Python, key: alias::PyObject, - default: utils::OptionalArgument<'p>, + default: utils::OptionalArgument, ) -> pyo3::PyResult { let key = utils::PrecomputedHashObject::new(py, key)?; @@ -285,7 +285,7 @@ impl PyRRCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) @@ -337,7 +337,7 @@ impl PyRRCache { drop(policy); let default_object = match default { - utils::OptionalArgument::Defined(x) => x.unbind(), + utils::OptionalArgument::Defined(x) => x, utils::OptionalArgument::Undefined => unsafe { // SAFETY: None is immortal, so reference counting has no meaning pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() @@ -374,7 +374,7 @@ impl PyRRCache { } match default { - utils::OptionalArgument::Defined(x) => Ok(x.unbind()), + utils::OptionalArgument::Defined(x) => Ok(x), utils::OptionalArgument::Undefined => Err(new_py_error!( PyKeyError, Into::::into(key) diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs new file mode 100644 index 0000000..1a9c869 --- /dev/null +++ b/src/pyclasses/ttlcache.rs @@ -0,0 +1,897 @@ +use crate::internal::alias; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::ttlpolicy; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A Time-To-Live (TTL) cache eviction policy: each entry carries an expiration timestamp + /// and is considered stale — and eligible for eviction — once that deadline has passed, + /// regardless of how recently or frequently it was accessed. + /// + /// ## How It Works + /// The TTL algorithm pairs time-based expiration with insertion-order eviction. Every entry + /// is stamped with an absolute `expires_at` timestamp at insertion time (computed as + /// `now + global_ttl`). Entries are stored in insertion order, and eviction proceeds from the + /// front of that queue — but only after confirming the candidate has actually expired. A live + /// entry at the front of the queue blocks eviction of everything behind it, so the cache may + /// temporarily exceed capacity if the oldest entries are still fresh. + /// + /// Like `FIFOPolicy`, this implementation backs the queue with a `double-ended queue` for O(1) + /// front removal and a `hash map` for O(1) key lookups. The same logical-index trick applies: + /// the table stores monotonically increasing counters rather than physical deque positions, and + /// a `front_offset` counter converts a logical index back to a physical one at read time via + /// `entries[table[key] - front_offset]`. This keeps eviction and lookup O(1) without rewriting + /// the table on every eviction. On top of that, every read checks `expires_at` against the current wall-clock time and + /// treats any expired entry as a cache miss. + /// + /// Without `grace_time`, an expiry sweep is triggered automatically on every call to + /// `insert`, `update`, `current_size`, `remaining_size`, `last`, `first`, `items`, `keys`, + /// `values`, and `__iter__`. A completely idle cache will accumulate stale entries between + /// these calls, but any normal interaction with the cache is sufficient to reclaim them. + /// When `grace_time` is set, a background Rust thread performs the sweep on that interval + /// instead, reclaiming expired entries independent of any method calls. + /// + /// ### Pros + /// - Insert, lookup, and evict are all O(1) amortized: the `front_offset` trick eliminates the O(n) + /// index-shifting that a naïve implementation would require on every eviction. + /// - Entries expire automatically without any background thread or explicit invalidation call. + /// Stale data is never returned to the caller. + /// - TTL expiry and insertion-order eviction compose cleanly: the oldest entry is always evicted + /// first among those that have already expired. + /// - A single `global_ttl` keeps configuration simple; every entry ages at the same rate. + /// + /// ### Cons + /// + /// - Wall-clock dependency. Correctness relies on a monotonically advancing system clock. + /// Clock adjustments (NTP steps, suspend/resume) can cause entries to expire earlier or later + /// than intended. + /// - When `grace_time` is set, a background Rust thread wakes on that interval to sweep and + /// remove all expired entries. This adds a small amount of background CPU usage and + /// introduces a reaper thread for the lifetime of the cache. + /// - No per-entry TTL override. All entries share `global_ttl`; mixed expiry requirements need + /// a different policy or a wrapper layer. + /// - The rare O(n) index rebase (triggered when `front_offset` nears `usize::MAX - isize::MAX`) + /// introduces an occasional latency spike. Amortized cost is negligible, but worst-case + /// latency is unbounded in principle. + /// + /// ## When to use it + /// Reach for `TTLPolicy` when: + /// - Cached data has a natural freshness window: API responses, auth tokens, DNS records, + /// rate-limit counters, or any value that becomes incorrect or unsafe after a known interval. + /// - You need automatic expiry without a background reaper thread — expiry sweeps on common + /// method calls are sufficient, or you want continuous reclamation via `grace_time`. + /// - Access patterns are unpredictable or uniform enough that recency- or frequency-based + /// eviction (LRU/LFU) would offer no meaningful advantage. + /// + /// Avoid it when: + /// - Your workload has strong temporal locality and you need a best-effort hit rate policy — + /// LRU will serve you better. + /// - Per-entry TTL granularity is required. If different keys need different lifetimes, + /// consider a policy that accepts per-insertion expiry hints. + /// - Your environment has an unreliable or adjustable system clock, where wall-clock-based + /// expiry may behave unexpectedly. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyTTLCache as "TTLCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyTTLCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `PyTTLCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. + /// global_ttl: Time-to-live for cache entries, either as seconds or a timedelta. + /// iterable: Initial data to populate the cache. + /// capacity: Pre-allocate capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, global_ttl, iterable=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + global_ttl: utils::FloatOrTimedelta, + iterable: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + // TODO: support sweep_interval + + let global_ttl: f64 = global_ttl.into(); + if global_ttl <= 0.0 { + return Err(new_py_error!( + PyValueError, + "global_ttl must be positive and non-zero" + )); + } + + let wrapped = Wrapped::new(ttlpolicy::TTLPolicy::new(capacity), unsafe { + ttlpolicy::Shared::with_ttl(maxsize, getsizeof, Some(global_ttl)) + }); + + if let Some(iterable) = iterable { + let ttl: ttlpolicy::ExpiresAt = wrapped.shared().global_ttl().unwrap().into(); + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| ttlpolicy::ExpiringHandle::new(py, &getsizeof, ttl, key, value), + ); + self.0.set(wrapped); + result + } else { + self.0.set(wrapped); + Ok(()) + } + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.expire(py, inner.shared())?; + Ok(policy.current_size()) + } + + #[inline] + fn remaining_size(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + { + let mut policy = inner.policy(); + policy.expire(py, inner.shared())?; + } + + Ok(inner.remaining_size()) + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + #[getter] + #[inline] + fn global_ttl(&self) -> f64 { + let inner = self.0.get(); + unsafe { inner.shared().global_ttl().unwrap_unchecked().as_secs_f64() } + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity().min(policy.entries().capacity()) + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + debug_assert!(policy.table().len() == policy.entries().len()); + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + let table_cap = policy.table().capacity() * std::mem::size_of::(); + let vecdeque_cap = + policy.entries().capacity() * std::mem::size_of::(); + + table_cap + vecdeque_cap + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let shared = inner.shared(); + let handle = ttlpolicy::ExpiringHandle::new( + py, + shared.getsizeof(), + unsafe { shared.global_ttl().unwrap_unchecked().into() }, + key, + value, + )?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let inner = slf.0.get(); + let shared = inner.shared(); + + let ttl: ttlpolicy::ExpiresAt = unsafe { shared.global_ttl().unwrap_unchecked().into() }; + let getsizeof = shared.getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| ttlpolicy::ExpiringHandle::new(py, &getsizeof, ttl, key, value), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value)?; + Ok(()) + } + + /// Retrieves the value for a given key from the cache. + /// + /// Returns the value associated with the key if present, otherwise returns the specified default value. + /// Equivalent to `self[key]`, but provides a fallback default if the key is not found. + /// + /// Args: + /// key: The key to look up in the cache. + /// default: The value to return if the key is not present in the cache. Defaults to None. + /// + /// Returns: + /// The value associated with the key, or the default value if the key is not found. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Inserts key with a value of default if key is not in the cache. + /// + /// Returns the value for key if key is in the cache, else default. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x, + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = ttlpolicy::ExpiringHandle::with_precomputed_hash_key( + py, + shared.getsizeof(), + unsafe { shared.global_ttl().unwrap_unchecked().into() }, + key, + default_object.clone_ref(py), + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + /// Removes specified key and returns the corresponding value. + /// + /// If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(py, inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + + let iter = inner.policy().iter(py, inner.shared())?; + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyTTLCacheItems { + iter: parking_lot::Mutex::new(iter), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + + let iter = inner.policy().iter(py, inner.shared())?; + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyTTLCacheValues { + iter: parking_lot::Mutex::new(iter), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + + let iter = inner.policy().iter(py, inner.shared())?; + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyTTLCacheKeys { + iter: parking_lot::Mutex::new(iter), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + #[inline] + fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.keys(py) + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + let now = std::time::SystemTime::now(); + let iter = policy + .entries() + .iter() + .filter(|handle| !handle.is_expired(now)) + .map(|handle| { + ( + // Without using `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }); + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[{}/{}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + policy.current_size(), + shared.maxsize(), + items + ) + } + + #[inline] + #[pyo3(signature=(*, reuse=false))] + fn expire(&self, py: pyo3::Python, reuse: bool) -> pyo3::PyResult<()> { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.expire(py, shared)?; + + if !reuse { + policy.shrink_to_fit(shared); + } + Ok(()) + } + + #[pyo3(signature = (n=0))] + fn first( + &self, + py: pyo3::Python, + mut n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + let mut policy = inner.policy(); + + policy.expire(py, inner.shared())?; + + if n < 0 { + n += policy.entries().len() as isize; + } + if n < 0 { + return Err(new_py_error!(PyIndexError, "`n` out of range")); + } + + match policy.entries().get(n as usize) { + Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), + None => Err(new_py_error!(PyIndexError, "`n` out of range")), + } + } + + fn last(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + let mut policy = inner.policy(); + + policy.expire(py, inner.shared())?; + + match policy.entries().back() { + Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), + None => Err(new_py_error!(PyIndexError, "`n` out of range")), + } + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get_with_expire( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult<(alias::PyObject, f64)> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + let dur = x + .expires_at() + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default(); + + return Ok((x.value().clone_ref(py), dur.as_secs_f64())); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok((x, 0.0)), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(( + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind(), + 0.0, + )) + }, + } + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop_with_expire( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult<(alias::PyObject, f64)> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + let dur = x + .expires_at() + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default(); + + return Ok((x.into_value(), dur.as_secs_f64())); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok((x, 0.0)), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn popitem_with_expire( + &self, + py: pyo3::Python, + ) -> pyo3::PyResult<(alias::PyObject, alias::PyObject, f64)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(py, inner.shared())?; + drop(policy); + + let dur = handle + .expires_at() + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default(); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val, dur.as_secs_f64())) + } + + fn items_with_expire( + &self, + py: pyo3::Python, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + + let iter = inner.policy().iter(py, inner.shared())?; + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] + let result = PyTTLCacheItemsWithExpire { + iter: parking_lot::Mutex::new(iter), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + let inner = self.0.get(); + let policy = inner.policy(); + + for handle in policy.entries().iter() { + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] + $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let now = std::time::SystemTime::now(); + let mut iter = slf.iter.lock(); + let $py = slf.py(); + + while let Some(x) = iter.next() { + let $handle = unsafe { x.as_ref() }; + if $handle.is_expired(now) { + continue; + } + + return Ok($init); + } + + Err(new_py_error!(PyStopIteration, ())) + } + } + )+ + }; +} +implement_iterator!( + PyTTLCacheItems as "ttlcache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyTTLCacheItemsWithExpire as "ttlcache_items_with_expire" + fn(py, handle) -> (alias::PyObject, alias::PyObject, f64) {{ + let dur = handle + .expires_at() + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default(); + + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val, dur.as_secs_f64()) + }} + + PyTTLCacheKeys as "ttlcache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyTTLCacheValues as "ttlcache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/tests/mixins.py b/tests/mixins.py index 1f7a2eb..7b7ecb5 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -434,6 +434,18 @@ def test_copy_preserves_maxsize(self): assert c2.maxsize == cache.maxsize +@dataclasses.dataclass +class Sized: + size: int + key: typing.Any + + def __hash__(self) -> int: + return hash(self.key) + + def __eq__(self, other: object) -> bool: + return isinstance(other, Sized) and self.key == other.key + + class GetSizeOfMixin(BaseMixin): def test_current_size_uses_getsizeof(self): # Each value is a list; size = len(value) @@ -456,6 +468,65 @@ def test_overflow_based_on_weighted_size(self): with pytest.raises(OverflowError): c.insert("c", 1) # would push to 6 + def test_getsizeof_invalid_handle_size(self): + c = self.create_cache(maxsize=5, getsizeof=lambda x, _: len(x)) + + with pytest.raises(OverflowError): + c["more than 5"] = 1 + + with pytest.raises(OverflowError): + c.update({"more than 5": 1}) + + with pytest.raises(OverflowError): + c.update({"5": 1, "more than 5": 2}) + + assert "5" in c + + def test_getsizeof_insert_enforced(self): + c = self.create_cache(maxsize=100, getsizeof=lambda x, v: x.size + v.size) + + k1 = Sized(10, 1) + v1 = Sized(80, 101) + c[k1] = v1 + + k2 = Sized(10, 2) + v2 = Sized(80, 102) + + if isinstance(c, cachebox.Cache): + with pytest.raises(OverflowError): + c[k2] = v2 + + assert k1 in c + + else: + c[k2] = v2 + assert k1 not in c + assert k2 in c + assert c.current_size() <= c.maxsize + + def test_getsizeof_insert_existing_key_enforced(self): + c = self.create_cache(maxsize=100, getsizeof=lambda x, _: x.size) + + a_size_10 = Sized(10, "A") + a_size_100 = Sized(100, "A") + + b_size_10 = Sized(10, "B") + + c[a_size_10] = 1 + c[b_size_10] = 2 + + # A(10) -> currsize=10 + # B(10) -> currsize=20 + # + # A(100) -> currsize=110 - exceeded maxsize, should call evict + if isinstance(c, cachebox.Cache): + with pytest.raises(OverflowError): + c[a_size_100] = "new" + + return + + c[a_size_100] = "new" + class EdgeCasesMixin(BaseMixin): def test_integer_keys(self): diff --git a/tests/test_impls.py b/tests/test_impls.py index 320a005..616f9a8 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -1,4 +1,6 @@ +import time import typing +from datetime import timedelta import pytest @@ -100,7 +102,7 @@ def create_cache( iterable: typing.Any = None, capacity: int = 0, getsizeof: typing.Any = None, - ) -> cachebox.BaseCacheImpl: + ) -> cachebox.FIFOCache: return cachebox.FIFOCache( maxsize, iterable, @@ -889,3 +891,412 @@ def test_insert_returns_old_value_for_existing_key(self): old = c.insert("x", 99) assert old == 1 assert c["x"] == 99 + + +class TestTTLCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.PopitemMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.TTLCache: + return cachebox.TTLCache( + maxsize, + 100, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + +class TestTTLCachePolicy: + def create_cache( + self, + maxsize: int = 10, + ttl: float | timedelta = 10, + iterable: typing.Any = None, + ) -> cachebox.TTLCache: + return cachebox.TTLCache(maxsize, ttl, iterable) + + def test_global_ttl_property(self): + c = self.create_cache(10, 5) + assert c.global_ttl == 5 + + c = self.create_cache(10, timedelta(seconds=5)) + assert c.global_ttl == 5 + + with pytest.raises(ValueError): + c = self.create_cache(10, 0) + + with pytest.raises(ValueError): + c = self.create_cache(10, -1) + + def test_global_ttl_with_iterable(self): + c = self.create_cache(10, 1, {"A": "B", "C": "D"}) + assert c.global_ttl == 1 + + assert "A" in c + assert "C" in c + + time.sleep(1) + + assert "A" not in c + assert "C" not in c + + # __len__ does not call expire + assert len(c) == 2 + + # current_size calls expire + assert c.current_size() == 0 + assert len(c) == 0 + + def test_oldest_item_evicted_on_overflow(self): + """When capacity is exceeded, the first inserted key must be evicted.""" + cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" # triggers eviction of key 1 + assert 1 not in cache + assert 4 in cache + + def test_eviction_is_strictly_insertion_ordered(self): + """Keys evict in the exact order they were inserted, not access order.""" + cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + + cache[4] = "d" # evicts 1 + cache[5] = "e" # evicts 2 + cache[6] = "f" # evicts 3 + + assert 1 not in cache + assert 2 not in cache + assert 3 not in cache + assert {4, 5, 6} == set(cache.keys()) + + def test_accessing_key_does_not_reset_eviction_priority(self): + """ + Unlike LRU, a cache hit must NOT push the key to the back. + Key 1 is accessed repeatedly but must still be the first evicted. + """ + cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + + _ = cache[1] + _ = cache[1] + _ = cache[1] + + cache[4] = "d" # must still evict key 1 + assert 1 not in cache + + def test_overwriting_existing_key_does_not_change_eviction_order(self): + """ + Updating the value of an existing key must NOT change its insertion + position in the eviction queue. + """ + cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + + cache[1] = "updated" # update, not a new insertion + cache[4] = "d" # must still evict key 1 + + assert 1 not in cache + assert cache[4] == "d" + + def test_popitem_removes_oldest(self): + """popitem() must always remove and return the oldest inserted entry.""" + cache = self.create_cache(3, 10, [(10, "x"), (20, "y"), (30, "z")]) + key, value = cache.popitem() + assert key == 10 + assert value == "x" + + def test_popitem_successive_calls_follow_fifo(self): + """Successive popitem() calls must yield keys in insertion order.""" + insertion_order = [(1, "a"), (2, "b"), (3, "c"), (4, "d")] + cache = self.create_cache(4, 10, insertion_order) + popped_keys = [cache.popitem()[0] for _ in range(4)] + assert popped_keys == [1, 2, 3, 4] + + def test_drain_removes_n_oldest(self): + """drain(n) must remove exactly n items, oldest-first.""" + cache = self.create_cache(5, 10, [(i, str(i)) for i in range(1, 6)]) + removed = cache.drain(3) + assert removed == 3 + assert 1 not in cache + assert 2 not in cache + assert 3 not in cache + assert 4 in cache + assert 5 in cache + + def test_first_returns_oldest_key(self): + cache = self.create_cache(3, 10, [(7, "a"), (8, "b"), (9, "c")]) + assert cache.first() == 7 + + def test_last_returns_newest_key(self): + cache = self.create_cache(3, 10, [(7, "a"), (8, "b"), (9, "c")]) + assert cache.last() == 9 + + def test_first_with_positive_n_browses_in_insertion_order(self): + """first(n) must walk forward through insertion order.""" + cache = self.create_cache(4, 10, [(10, "a"), (20, "b"), (30, "c"), (40, "d")]) + assert cache.first(0) == 10 + assert cache.first(1) == 20 + assert cache.first(2) == 30 + assert cache.first(3) == 40 + + def test_first_with_negative_n_browses_from_end(self): + """first(-1) is an alias for last(); first(-2) is the second newest.""" + cache = self.create_cache(4, 10, [(10, "a"), (20, "b"), (30, "c"), (40, "d")]) + assert cache.first(-1) == 40 + assert cache.first(-2) == 30 + + def test_first_after_eviction_reflects_new_head(self): + """After an eviction, first() must return the new oldest key.""" + cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" # evicts key 1 + assert cache.first() == 2 + + def test_last_after_insertion_reflects_new_tail(self): + cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" + assert cache.last() == 4 + + def test_first_on_single_element_cache(self): + cache = self.create_cache(1, 10, [(42, "only")]) + assert cache.first() == 42 + assert cache.last() == 42 + + def test_first_raise_indexerror_on_empty_cache(self): + cache = self.create_cache(0) + + with pytest.raises(IndexError): + cache.first() + + def test_rolling_window_maintains_correct_contents(self): + """ + Simulate a sliding-window workload: insert N items into a cache of + size K and verify that only the most-recently inserted K items survive. + """ + maxsize = 4 + total = 20 + cache = self.create_cache(maxsize) + + for i in range(total): + cache[i] = i * 10 + + expected = set(range(total - maxsize, total)) + assert set(cache.keys()) == expected + + def test_no_phantom_keys_after_eviction(self): + """Evicted keys must not linger in contains() or iteration.""" + cache = self.create_cache(2, 10, [(1, "a"), (2, "b")]) + cache[3] = "c" # evicts 1 + + for key in cache: + assert key != 1 + + assert not cache.contains(1) + + def test_reinsert_evicted_key_rejoins_at_tail(self): + """ + Re-inserting a previously evicted key must treat it as a brand-new + entry positioned at the back of the queue. + """ + cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache[4] = "d" # evicts 1 + cache[1] = "re" # re-insert 1 — should now be at the tail + cache[5] = "e" # must evict 2 (now the oldest), not 1 + + assert 2 not in cache + assert 1 in cache + assert cache[1] == "re" + + def test_is_full_triggers_at_maxsize(self): + cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + assert cache.is_full() + cache[4] = "d" # eviction should keep it full, not overflow + assert cache.is_full() + assert len(cache) == 3 + + def test_len_never_exceeds_maxsize(self): + cache = self.create_cache(5) + for i in range(100): + cache[i] = i + + assert len(cache) <= 5 + + def test_clear_resets_fifo_order(self): + """After clear(), the insertion order restarts from scratch.""" + cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache.clear() + cache[10] = "x" + cache[20] = "y" + cache[30] = "z" + assert cache.first() == 10 + assert cache.last() == 30 + + @pytest.mark.skipif( + not hasattr(cachebox, "_fifocache_small_offset"), + reason="requires small-offset feature flag", + ) + def test_edge_case_of_front_offset_overflow(self): + """ + Verifies that FIFOCache correctly rebases its internal `front_offset` + counter when it approaches `u8::MAX` (255 in the small-offset test build). + """ + U8_MAX = 255 + CACHE_SIZE = 10 + + cache = self.create_cache(CACHE_SIZE) + + # drive front_offset to the rebase boundary + total_insertions = U8_MAX + CACHE_SIZE # 265 + for i in range(total_insertions): + cache.insert(i, i * 10) + + # Snapshot what *should* be alive: the last CACHE_SIZE keys inserted + expected_keys = set(range(total_insertions - CACHE_SIZE, total_insertions)) + + # verify the cache is structurally sound after the rebase + assert len(cache) == CACHE_SIZE + assert cache.is_full() + + # Exact contents — no phantom or missing keys + assert set(cache.keys()) == expected_keys + + # FIFO ordering must be intact + assert cache.first() == min(expected_keys) + assert cache.last() == max(expected_keys) + + # All surviving values are correct + for key in expected_keys: + assert cache[key] == key * 10 + + # All evicted keys are truly gone + for evicted in range(total_insertions - CACHE_SIZE): + assert evicted not in cache + + # Prove the cache keeps working normally after the rebase + + # New insertions must evict the oldest surviving key (min of expected_keys) + next_key = total_insertions # 265 + oldest_before = cache.first() + cache.insert(next_key, next_key * 10) + + assert oldest_before not in cache # oldest was evicted + assert cache[next_key] == next_key * 10 # new entry is present + assert cache.last() == next_key # sits at the tail + assert len(cache) == CACHE_SIZE # size is unchanged + + # Ordering of the remainder is still correct + assert cache.first() == min(expected_keys) + 1 + + # popitem() must still yield the oldest entry + oldest_key, oldest_val = cache.popitem() + assert oldest_val == oldest_key * 10 + + def test_global_ttl_on_insert(self): + obj = self.create_cache(2, 0.5) + assert obj.global_ttl == 0.5 + + obj.insert(0, 1) + time.sleep(0.8) + + with pytest.raises(KeyError): + obj[0] + + obj = self.create_cache(2, 20) + + obj.insert(0, 0) + obj.insert(1, 1) + obj.insert(2, 2) + + assert 0 not in obj + assert (1, 1) == obj.popitem() + + def test_global_ttl_on_update(self): + obj = self.create_cache(2, 0.5) + + # maxsize=2 - (1, 1) should be evicated because + obj.update((i + 1, i + 1) for i in range(3)) + + with pytest.raises(KeyError): + obj[1] + + time.sleep(0.8) + + with pytest.raises(KeyError): + obj[2] + + with pytest.raises(KeyError): + obj[3] + + def test_get_with_expire(self): + obj = self.create_cache(2, 10) + + obj.insert(1, 1) + time.sleep(0.1) + value, dur = obj.get_with_expire(1) + assert 1 == value + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + value, dur = obj.get_with_expire("no-exists") + assert value is None + assert 0 == dur + + value, dur = obj.get_with_expire("no-exists", "value") + assert "value" == value + assert 0 == dur + + def test_pop_with_expire(self): + obj = self.create_cache(2, 10) + + obj.insert(1, 1) + time.sleep(0.1) + value, dur = obj.pop_with_expire(1) + assert 1 == value + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + value, dur = obj.pop_with_expire("no-exists", None) + assert value is None + assert 0 == dur + + value, dur = obj.pop_with_expire("no-exists", "value") + assert "value" == value + assert 0 == dur + + def test_popitem_with_expire(self): + obj = self.create_cache(2, 10) + + obj.insert(1, 1) + obj.insert(2, 2) + time.sleep(0.1) + key, value, dur = obj.popitem_with_expire() + assert (1, 1) == (key, value) + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + key, value, dur = obj.popitem_with_expire() + assert (2, 2) == (key, value) + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + with pytest.raises(KeyError): + obj.popitem_with_expire() + + def test_items_with_expire(self): + # no need to test completely items_with_expire + # because it's tested in test_iterators + obj = self.create_cache(10, 3, {1: 2, 3: 4}) + for key, val, ttl in obj.items_with_expire(): + assert key in obj + assert val == obj[key] + assert isinstance(ttl, float) From 1eab960867bfb56da1ec717ce8c177e0ddfaf922 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 25 May 2026 15:19:35 +0330 Subject: [PATCH 18/60] Optimize performance of TTLCache.expire --- src/policies/ttlpolicy.rs | 9 ++++----- src/pyclasses/ttlcache.rs | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index 34e4db5..487fb7b 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -334,7 +334,7 @@ impl TTLPolicy { self.front_offset = 0; } - pub fn expire(&mut self, py: pyo3::Python<'_>, shared: &Shared) -> pyo3::PyResult<()> { + pub fn expire(&mut self, shared: &Shared) -> pyo3::PyResult<()> { let now = std::time::SystemTime::now(); while let Some(handle) = self.entries.front() { @@ -342,7 +342,7 @@ impl TTLPolicy { break; } - let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, handle.key()); + let eq = |index: &usize| Ok::<_, pyo3::PyErr>((*index - self.front_offset) == 0); if std::hint::unlikely(self.table.remove_entry(handle.key().hash(), eq)?.is_none()) { unreachable!("popitem key not found in table"); } @@ -361,10 +361,9 @@ impl TTLPolicy { #[inline] pub fn iter( &mut self, - py: pyo3::Python<'_>, shared: &Shared, ) -> pyo3::PyResult> { - self.expire(py, shared)?; + self.expire(shared)?; let (first, second) = self.entries.as_slices(); Ok(utils::RawVecDequeIter::new(first, second)) @@ -420,7 +419,7 @@ impl PolicyExt for TTLPolicy { key: &::Key, shared: &'a Self::Shared, ) -> pyo3::PyResult, Self::Vacant<'a>>> { - self.expire(py, shared)?; + self.expire(shared)?; let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, key); match self.table.find(key.hash(), eq)? { diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index 1a9c869..772ba28 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -157,19 +157,19 @@ impl PyTTLCache { } #[inline] - fn current_size(&self, py: pyo3::Python) -> pyo3::PyResult { + fn current_size(&self) -> pyo3::PyResult { let inner = self.0.get(); let mut policy = inner.policy(); - policy.expire(py, inner.shared())?; + policy.expire(inner.shared())?; Ok(policy.current_size()) } #[inline] - fn remaining_size(&self, py: pyo3::Python) -> pyo3::PyResult { + fn remaining_size(&self) -> pyo3::PyResult { let inner = self.0.get(); { let mut policy = inner.policy(); - policy.expire(py, inner.shared())?; + policy.expire(inner.shared())?; } Ok(inner.remaining_size()) @@ -555,7 +555,7 @@ impl PyTTLCache { fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let iter = inner.policy().iter(py, inner.shared())?; + let iter = inner.policy().iter(inner.shared())?; let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -572,7 +572,7 @@ impl PyTTLCache { fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let iter = inner.policy().iter(py, inner.shared())?; + let iter = inner.policy().iter(inner.shared())?; let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -589,7 +589,7 @@ impl PyTTLCache { fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let iter = inner.policy().iter(py, inner.shared())?; + let iter = inner.policy().iter(inner.shared())?; let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -651,12 +651,12 @@ impl PyTTLCache { #[inline] #[pyo3(signature=(*, reuse=false))] - fn expire(&self, py: pyo3::Python, reuse: bool) -> pyo3::PyResult<()> { + fn expire(&self, reuse: bool) -> pyo3::PyResult<()> { let inner = self.0.get(); let shared = inner.shared(); let mut policy = inner.policy(); - policy.expire(py, shared)?; + policy.expire(shared)?; if !reuse { policy.shrink_to_fit(shared); @@ -673,7 +673,7 @@ impl PyTTLCache { let inner = self.0.get(); let mut policy = inner.policy(); - policy.expire(py, inner.shared())?; + policy.expire(inner.shared())?; if n < 0 { n += policy.entries().len() as isize; @@ -692,7 +692,7 @@ impl PyTTLCache { let inner = self.0.get(); let mut policy = inner.policy(); - policy.expire(py, inner.shared())?; + policy.expire(inner.shared())?; match policy.entries().back() { Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), @@ -787,7 +787,7 @@ impl PyTTLCache { ) -> pyo3::PyResult> { let inner = self.0.get(); - let iter = inner.policy().iter(py, inner.shared())?; + let iter = inner.policy().iter(inner.shared())?; let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); From 904ca7ccedbe87eb84788998b037166fbbdc0da0 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 25 May 2026 15:56:11 +0330 Subject: [PATCH 19/60] Add simple benchmarks in pytest --- .gitignore | 3 +- requirements-dev.txt | 3 + tests/benchmark.py | 81 ++++++++++++++++++++++++++ tests/mixins.py | 135 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 tests/benchmark.py diff --git a/.gitignore b/.gitignore index 6535ef0..2feb865 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ __pycache__ /.pytest_cache /htmlcov /backup -/a.py +/.benchmarks +/.hypothesis diff --git a/requirements-dev.txt b/requirements-dev.txt index 9b9e5f9..e05d79c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,2 +1,5 @@ +maturin pytest hypothesis +pytest-benchmark +pygal diff --git a/tests/benchmark.py b/tests/benchmark.py new file mode 100644 index 0000000..fa07e41 --- /dev/null +++ b/tests/benchmark.py @@ -0,0 +1,81 @@ +import typing + +import cachebox + +from . import mixins + + +class TestCache(mixins.BenchmarkMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.Cache: + return cachebox.Cache(maxsize, iterable, capacity=capacity, getsizeof=getsizeof) + + +class TestFIFOCache(mixins.BenchmarkMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.FIFOCache: + return cachebox.FIFOCache( + maxsize, iterable, capacity=capacity, getsizeof=getsizeof + ) + + +class TestRRCache(mixins.BenchmarkMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.RRCache: + return cachebox.RRCache( + maxsize, iterable, capacity=capacity, getsizeof=getsizeof + ) + + +class TestLRUCache(mixins.BenchmarkMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.LRUCache: + return cachebox.LRUCache( + maxsize, iterable, capacity=capacity, getsizeof=getsizeof + ) + + +class TestLFUCache(mixins.BenchmarkMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.LFUCache: + return cachebox.LFUCache( + maxsize, iterable, capacity=capacity, getsizeof=getsizeof + ) + + +class TestTTLCache(mixins.BenchmarkMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.TTLCache: + return cachebox.TTLCache( + maxsize, 10, iterable, capacity=capacity, getsizeof=getsizeof + ) diff --git a/tests/mixins.py b/tests/mixins.py index 7b7ecb5..7dabab9 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -752,3 +752,138 @@ def test_fuzzy_copy_is_independent_of_original( c2 = c.copy() c2.insert(new_key, new_value) assert not c.contains(new_key) + + +class BenchmarkMixin(BaseMixin): + @pytest.fixture(autouse=True) + def _set_benchmark_name(self, benchmark, request): + benchmark.name = f"{type(self).__name__}.{request.node.originalname}" + + @pytest.fixture() + def cache(self) -> cachebox.BaseCacheImpl: + return self.create_cache(256) + + @pytest.fixture() + def full_cache(self) -> cachebox.BaseCacheImpl: + """A cache pre-populated to capacity.""" + c = self.create_cache(256) + + for i in range(256): + c.insert(i, i) + + return c + + def test_bench_insert(self, benchmark, cache): + i = 0 + + def run(): + nonlocal i + cache.insert(i % 256, i) + i += 1 + + benchmark.pedantic(run, iterations=1000, rounds=100, warmup_rounds=2) + + def test_bench_update(self, benchmark, cache): + data = {i: i for i in range(64)} + benchmark.pedantic( + cache.update, + args=(data,), + iterations=1000, + rounds=100, + warmup_rounds=2, + ) + + def test_bench_get_hit(self, benchmark, full_cache): + key = 0 + benchmark.pedantic( + full_cache.get, + args=(key,), + iterations=1000, + rounds=100, + warmup_rounds=2, + ) + + def test_bench_get_miss(self, benchmark, cache): + key = 9999 + benchmark.pedantic( + cache.get, + args=(key, None), + iterations=1000, + rounds=100, + warmup_rounds=2, + ) + + def test_bench_getitem(self, benchmark, full_cache): + key = 0 + benchmark.pedantic( + full_cache.__getitem__, + args=(key,), + iterations=1000, + rounds=100, + warmup_rounds=2, + ) + + def test_bench_contains(self, benchmark, full_cache): + key = 0 + benchmark.pedantic( + full_cache.contains, + args=(key,), + iterations=1000, + rounds=100, + warmup_rounds=2, + ) + + def test_bench_pop(self, benchmark): + """Each round gets a fresh cache so pop always finds the key.""" + key = 0 + val = 0 + + def setup(): + c = self.create_cache(256) + c.insert(key, val) + return (c,), {} + + benchmark.pedantic( + lambda c: c.pop(key, None), + setup=setup, + iterations=1, + rounds=1000, + warmup_rounds=5, + ) + + def test_bench_popitem(self, benchmark): + """Each round gets a fresh full cache.""" + if isinstance(self.create_cache(0), cachebox.Cache): + pytest.skip("cachebox.Cache not supported this") + + def setup(): + c = self.create_cache(1000) + for i in range(1000): + c.insert(i, i) + + return (c,), {} + + benchmark.pedantic( + lambda c: c.popitem(), + setup=setup, + iterations=1, + rounds=200, + warmup_rounds=5, + ) + + def test_bench_delitem(self, benchmark): + key = 0 + val = 0 + + def setup(): + c = self.create_cache(256) + c.insert(key, val) + return (c,), {} + + benchmark.pedantic( + lambda c: c.__delitem__(key), + setup=setup, + iterations=1, + rounds=200, + warmup_rounds=5, + ) From 578b0d8e343f20b0e97b70dc52ca41496442a2e7 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 25 May 2026 20:27:30 +0330 Subject: [PATCH 20/60] Support background sweeper for TTLCache --- cachebox/__init__.py | 14 ++-- cachebox/_cachebox.py | 164 ++++++++++++++++++++++++++++++++++++++ cachebox/_core.pyi | 100 ++--------------------- src/policies/ttlpolicy.rs | 8 +- src/pyclasses/ttlcache.rs | 57 ++++++------- tests/test_impls.py | 8 ++ 6 files changed, 218 insertions(+), 133 deletions(-) create mode 100644 cachebox/_cachebox.py diff --git a/cachebox/__init__.py b/cachebox/__init__.py index 94495af..a4d4f68 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -1,10 +1,10 @@ -from ._core import BaseCacheImpl as BaseCacheImpl -from ._core import Cache as Cache -from ._core import FIFOCache as FIFOCache -from ._core import LFUCache as LFUCache -from ._core import LRUCache as LRUCache -from ._core import RRCache as RRCache -from ._core import TTLCache as TTLCache +from ._cachebox import BaseCacheImpl as BaseCacheImpl +from ._cachebox import Cache as Cache +from ._cachebox import FIFOCache as FIFOCache +from ._cachebox import LFUCache as LFUCache +from ._cachebox import LRUCache as LRUCache +from ._cachebox import RRCache as RRCache +from ._cachebox import TTLCache as TTLCache try: from ._core import ( diff --git a/cachebox/_cachebox.py b/cachebox/_cachebox.py new file mode 100644 index 0000000..1af8c09 --- /dev/null +++ b/cachebox/_cachebox.py @@ -0,0 +1,164 @@ +import threading +import time +import typing +from datetime import timedelta + +from ._core import BaseCacheImpl as BaseCacheImpl +from ._core import Cache as Cache +from ._core import FIFOCache as FIFOCache +from ._core import LFUCache as LFUCache +from ._core import LRUCache as LRUCache +from ._core import RRCache as RRCache + +# private import +from ._core import TTLCache as _CoreTTLCache + +if typing.TYPE_CHECKING: + from ._core import _IterableType + +KT = typing.TypeVar("KT", bound=typing.Hashable) +VT = typing.TypeVar("VT") + + +class TTLCache(_CoreTTLCache): + """ + A Time-To-Live (TTL) cache eviction policy: each entry carries an expiration timestamp + and is considered stale — and eligible for eviction — once that deadline has passed, + regardless of how recently or frequently it was accessed. + + ## How It Works + The TTL algorithm pairs time-based expiration with insertion-order eviction. Every entry + is stamped with an absolute `expires_at` timestamp at insertion time (computed as + `now + global_ttl`). Entries are stored in insertion order, and eviction proceeds from the + front of that queue — but only after confirming the candidate has actually expired. A live + entry at the front of the queue blocks eviction of everything behind it, so the cache may + temporarily exceed capacity if the oldest entries are still fresh. + + Like `FIFOPolicy`, this implementation backs the queue with a `double-ended queue` for O(1) + front removal and a `hash map` for O(1) key lookups. The same logical-index trick applies: + the table stores monotonically increasing counters rather than physical deque positions, and + a `front_offset` counter converts a logical index back to a physical one at read time via + `entries[table[key] - front_offset]`. This keeps eviction and lookup O(1) without rewriting + the table on every eviction. On top of that, every read checks `expires_at` against the current wall-clock time and + treats any expired entry as a cache miss. + + Without `sweep_interval`, an expiry sweep is triggered automatically on every call to + `insert`, `update`, `current_size`, `remaining_size`, `last`, `first`, `items`, `keys`, + `values`, and `__iter__`. A completely idle cache will accumulate stale entries between + these calls, but any normal interaction with the cache is sufficient to reclaim them. + When `sweep_interval` is set, a background thread performs the sweep on that interval + instead, reclaiming expired entries independent of any method calls. + + ### Pros + - Insert, lookup, and evict are all O(1) amortized: the `front_offset` trick eliminates the O(n) + index-shifting that a naïve implementation would require on every eviction. + - Entries expire automatically without any background thread or explicit invalidation call. + Stale data is never returned to the caller. + - TTL expiry and insertion-order eviction compose cleanly: the oldest entry is always evicted + first among those that have already expired. + - A single `global_ttl` keeps configuration simple; every entry ages at the same rate. + + ### Cons + + - Wall-clock dependency. Correctness relies on a monotonically advancing system clock. + Clock adjustments (NTP steps, suspend/resume) can cause entries to expire earlier or later + than intended. + - When `sweep_interval` is set, a background thread wakes on that interval to sweep and + remove all expired entries. This adds a small amount of background CPU usage and + introduces a reaper thread for the lifetime of the cache. + - No per-entry TTL override. All entries share `global_ttl`; mixed expiry requirements need + a different policy or a wrapper layer. + - The rare O(n) index rebase (triggered when `front_offset` nears `usize::MAX - isize::MAX`) + introduces an occasional latency spike. Amortized cost is negligible, but worst-case + latency is unbounded in principle. + + ## When to use it + Reach for `TTLPolicy` when: + - Cached data has a natural freshness window: API responses, auth tokens, DNS records, + rate-limit counters, or any value that becomes incorrect or unsafe after a known interval. + - You need automatic expiry without a background reaper thread — expiry sweeps on common + method calls are sufficient, or you want continuous reclamation via `sweep_interval`. + - Access patterns are unpredictable or uniform enough that recency- or frequency-based + eviction (LRU/LFU) would offer no meaningful advantage. + + Avoid it when: + - Your workload has strong temporal locality and you need a best-effort hit rate policy — + LRU will serve you better. + - Per-entry TTL granularity is required. If different keys need different lifetimes, + consider `VTTLCache`. + - Your environment has an unreliable or adjustable system clock, where wall-clock-based + expiry may behave unexpectedly. + """ + + def __init__( + self, + maxsize: int, + global_ttl: float | timedelta, + iterable: _IterableType[KT, VT] | None = None, + *, + capacity: int = 0, + getsizeof: typing.Callable[[KT, VT]] | None = None, + sweep_interval: float | timedelta | None = None, + ) -> None: + """ + Initialize a new instance. + + Args: + maxsize: Maximum number of elements the cache can hold. If zero, the limit is set to sys.maxsize internally. + global_ttl: Time-to-live for every entry, either as seconds (float) or a timedelta. Applied at insertion time. + iterable: Initial data to populate the cache. + capacity: Pre-allocate cache capacity to minimize reallocations. Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. When `None`, each + entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + Use this to implement weighted caching — for example, sizing entries by + memory footprint or byte length. + sweep_interval: If set, starts a background thread that sweeps and removes all expired entries on this interval. + When None, expiry is lazy. Defaults to `None`. *It should be more than 1*. + + The cache can be pre-sized via `capacity` to reduce reallocations when + the number of expected entries is known ahead of time. + """ + super().__init__( + maxsize, + global_ttl, + iterable, + capacity=capacity, + getsizeof=getsizeof, + ) + + self._thread: threading.Thread | None = None + self._thread_is_running: bool = False + + if sweep_interval is not None: + if isinstance(sweep_interval, timedelta): + sweep_interval = sweep_interval.total_seconds() + + if sweep_interval < 1: + raise ValueError("sweep_interval must be more than 1 seconds.") + + self._thread_is_running = True + self._thread = threading.Thread( + target=self._sweeper_thread, + args=(sweep_interval,), + daemon=True, + ) + self._thread.start() + + self._sweep_interval = sweep_interval + + @property + def sweep_interval(self) -> float | None: + """Returns the configured `sweep_interval`.""" + return self._sweep_interval + + def _sweeper_thread(self, interval: float): + while self._thread_is_running: + time.sleep(interval) + self.expire() + + def stop_sweeper(self) -> None: + """Signals the sweeper thread to stop ( if is active )""" + self._thread_is_running = False + + def __del__(self) -> None: + self.stop_sweeper() diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 7c9129e..3010a1d 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -3,6 +3,10 @@ from datetime import timedelta from _typeshed import SupportsItems +KT = typing.TypeVar("KT", bound=typing.Hashable) +VT = typing.TypeVar("VT") +DT = typing.TypeVar("DT") + _IterableType: typing.TypeAlias = ( typing.Dict[KT, VT] | SupportsItems[KT, VT] @@ -10,10 +14,6 @@ _IterableType: typing.TypeAlias = ( | typing.Iterable[typing.Tuple[KT, VT]] ) -KT = typing.TypeVar("KT") -VT = typing.TypeVar("VT") -DT = typing.TypeVar("DT") - class BaseCacheImpl(typing.Generic[KT, VT]): """ Base implementation for cache classes in the cachebox library. @@ -879,75 +879,6 @@ class LFUCache(BaseCacheImpl[KT, VT]): ... class TTLCache(BaseCacheImpl[KT, VT]): - """ - A Time-To-Live (TTL) cache eviction policy: each entry carries an expiration timestamp - and is considered stale — and eligible for eviction — once that deadline has passed, - regardless of how recently or frequently it was accessed. - - ## How It Works - The TTL algorithm pairs time-based expiration with insertion-order eviction. Every entry - is stamped with an absolute `expires_at` timestamp at insertion time (computed as - `now + global_ttl`). Entries are stored in insertion order, and eviction proceeds from the - front of that queue — but only after confirming the candidate has actually expired. A live - entry at the front of the queue blocks eviction of everything behind it, so the cache may - temporarily exceed capacity if the oldest entries are still fresh. - - Like `FIFOPolicy`, this implementation backs the queue with a `double-ended queue` for O(1) - front removal and a `hash map` for O(1) key lookups. The same logical-index trick applies: - the table stores monotonically increasing counters rather than physical deque positions, and - a `front_offset` counter converts a logical index back to a physical one at read time via - `entries[table[key] - front_offset]`. This keeps eviction and lookup O(1) without rewriting - the table on every eviction. On top of that, every read checks `expires_at` against the current wall-clock time and - treats any expired entry as a cache miss. - - Without `sweep_interval`, an expiry sweep is triggered automatically on every call to - `insert`, `update`, `current_size`, `remaining_size`, `last`, `first`, `items`, `keys`, - `values`, and `__iter__`. A completely idle cache will accumulate stale entries between - these calls, but any normal interaction with the cache is sufficient to reclaim them. - When `sweep_interval` is set, a background Rust thread performs the sweep on that interval - instead, reclaiming expired entries independent of any method calls. - - ### Pros - - Insert, lookup, and evict are all O(1) amortized: the `front_offset` trick eliminates the O(n) - index-shifting that a naïve implementation would require on every eviction. - - Entries expire automatically without any background thread or explicit invalidation call. - Stale data is never returned to the caller. - - TTL expiry and insertion-order eviction compose cleanly: the oldest entry is always evicted - first among those that have already expired. - - A single `global_ttl` keeps configuration simple; every entry ages at the same rate. - - ### Cons - - - Wall-clock dependency. Correctness relies on a monotonically advancing system clock. - Clock adjustments (NTP steps, suspend/resume) can cause entries to expire earlier or later - than intended. - - When `sweep_interval` is set, a background Rust thread wakes on that interval to sweep and - remove all expired entries. This adds a small amount of background CPU usage and - introduces a reaper thread for the lifetime of the cache. - - No per-entry TTL override. All entries share `global_ttl`; mixed expiry requirements need - a different policy or a wrapper layer. - - The rare O(n) index rebase (triggered when `front_offset` nears `usize::MAX - isize::MAX`) - introduces an occasional latency spike. Amortized cost is negligible, but worst-case - latency is unbounded in principle. - - ## When to use it - Reach for `TTLPolicy` when: - - Cached data has a natural freshness window: API responses, auth tokens, DNS records, - rate-limit counters, or any value that becomes incorrect or unsafe after a known interval. - - You need automatic expiry without a background reaper thread — expiry sweeps on common - method calls are sufficient, or you want continuous reclamation via `sweep_interval`. - - Access patterns are unpredictable or uniform enough that recency- or frequency-based - eviction (LRU/LFU) would offer no meaningful advantage. - - Avoid it when: - - Your workload has strong temporal locality and you need a best-effort hit rate policy — - LRU will serve you better. - - Per-entry TTL granularity is required. If different keys need different lifetimes, - consider `VTTLCache`. - - Your environment has an unreliable or adjustable system clock, where wall-clock-based - expiry may behave unexpectedly. - """ - def __init__( self, maxsize: int, @@ -956,28 +887,7 @@ class TTLCache(BaseCacheImpl[KT, VT]): *, capacity: int = 0, getsizeof: typing.Callable[[KT, VT]] | None = None, - sweep_interval: float | timedelta | None = None, - ) -> None: - """ - Initialize a new instance. - - Args: - maxsize: Maximum number of elements the cache can hold. If zero, the limit is set to sys.maxsize internally. - global_ttl: Time-to-live for every entry, either as seconds (float) or a timedelta. Applied at insertion time. - iterable: Initial data to populate the cache. - capacity: Pre-allocate cache capacity to minimize reallocations. Defaults to 0. - getsizeof: A callable that computes the size of a key-value pair. When `None`, each - entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). - Use this to implement weighted caching — for example, sizing entries by - memory footprint or byte length. - sweep_interval: If set, starts a background Rust thread that sweeps and removes all expired entries on this interval. - When None, expiry is lazy. Defaults to `None`. - - The cache can be pre-sized via `capacity` to reduce reallocations when - the number of expected entries is known ahead of time. - """ - ... - + ) -> None: ... @property def global_ttl(self) -> float: """Returns the specified `global_ttl`""" diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index 487fb7b..719033b 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -334,7 +334,7 @@ impl TTLPolicy { self.front_offset = 0; } - pub fn expire(&mut self, shared: &Shared) -> pyo3::PyResult<()> { + pub fn expire(&mut self, gv: &utils::GenerationVersion) -> pyo3::PyResult<()> { let now = std::time::SystemTime::now(); while let Some(handle) = self.entries.front() { @@ -347,7 +347,7 @@ impl TTLPolicy { unreachable!("popitem key not found in table"); } - shared.generation_version().increment(); + gv.increment(); let front = unsafe { self.entries.pop_front().unwrap_unchecked() }; @@ -363,7 +363,7 @@ impl TTLPolicy { &mut self, shared: &Shared, ) -> pyo3::PyResult> { - self.expire(shared)?; + self.expire(shared.generation_version())?; let (first, second) = self.entries.as_slices(); Ok(utils::RawVecDequeIter::new(first, second)) @@ -419,7 +419,7 @@ impl PolicyExt for TTLPolicy { key: &::Key, shared: &'a Self::Shared, ) -> pyo3::PyResult, Self::Vacant<'a>>> { - self.expire(shared)?; + self.expire(shared.generation_version())?; let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, key); match self.table.find(key.hash(), eq)? { diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index 772ba28..712da46 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -28,11 +28,11 @@ implement_pyclass! { /// the table on every eviction. On top of that, every read checks `expires_at` against the current wall-clock time and /// treats any expired entry as a cache miss. /// - /// Without `grace_time`, an expiry sweep is triggered automatically on every call to + /// Without `sweep_interval`, an expiry sweep is triggered automatically on every call to /// `insert`, `update`, `current_size`, `remaining_size`, `last`, `first`, `items`, `keys`, /// `values`, and `__iter__`. A completely idle cache will accumulate stale entries between /// these calls, but any normal interaction with the cache is sufficient to reclaim them. - /// When `grace_time` is set, a background Rust thread performs the sweep on that interval + /// When `sweep_interval` is set, a background Rust thread performs the sweep on that interval /// instead, reclaiming expired entries independent of any method calls. /// /// ### Pros @@ -49,7 +49,7 @@ implement_pyclass! { /// - Wall-clock dependency. Correctness relies on a monotonically advancing system clock. /// Clock adjustments (NTP steps, suspend/resume) can cause entries to expire earlier or later /// than intended. - /// - When `grace_time` is set, a background Rust thread wakes on that interval to sweep and + /// - When `sweep_interval` is set, a background thread wakes on that interval to sweep and /// remove all expired entries. This adds a small amount of background CPU usage and /// introduces a reaper thread for the lifetime of the cache. /// - No per-entry TTL override. All entries share `global_ttl`; mixed expiry requirements need @@ -63,7 +63,7 @@ implement_pyclass! { /// - Cached data has a natural freshness window: API responses, auth tokens, DNS records, /// rate-limit counters, or any value that becomes incorrect or unsafe after a known interval. /// - You need automatic expiry without a background reaper thread — expiry sweeps on common - /// method calls are sufficient, or you want continuous reclamation via `grace_time`. + /// method calls are sufficient, or you want continuous reclamation via `sweep_interval`. /// - Access patterns are unpredictable or uniform enough that recency- or frequency-based /// eviction (LRU/LFU) would offer no meaningful advantage. /// @@ -117,8 +117,6 @@ impl PyTTLCache { capacity: usize, getsizeof: Option, ) -> pyo3::PyResult<()> { - // TODO: support sweep_interval - let global_ttl: f64 = global_ttl.into(); if global_ttl <= 0.0 { return Err(new_py_error!( @@ -131,22 +129,26 @@ impl PyTTLCache { ttlpolicy::Shared::with_ttl(maxsize, getsizeof, Some(global_ttl)) }); - if let Some(iterable) = iterable { - let ttl: ttlpolicy::ExpiresAt = wrapped.shared().global_ttl().unwrap().into(); - let getsizeof = wrapped.shared().getsizeof().clone_ref(py); - - let result = wrapped.extend( - // iterable object - iterable, - // transform function - |key, value| ttlpolicy::ExpiringHandle::new(py, &getsizeof, ttl, key, value), - ); - self.0.set(wrapped); - result - } else { - self.0.set(wrapped); - Ok(()) - } + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let ttl: ttlpolicy::ExpiresAt = wrapped.shared().global_ttl().unwrap().into(); + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| ttlpolicy::ExpiringHandle::new(py, &getsizeof, ttl, key, value), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result } #[getter] @@ -160,7 +162,7 @@ impl PyTTLCache { fn current_size(&self) -> pyo3::PyResult { let inner = self.0.get(); let mut policy = inner.policy(); - policy.expire(inner.shared())?; + policy.expire(inner.shared().generation_version())?; Ok(policy.current_size()) } @@ -169,7 +171,7 @@ impl PyTTLCache { let inner = self.0.get(); { let mut policy = inner.policy(); - policy.expire(inner.shared())?; + policy.expire(inner.shared().generation_version())?; } Ok(inner.remaining_size()) @@ -611,6 +613,7 @@ impl PyTTLCache { fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); let cloned = inner.clone_ref(py); + let result = Self(onceinit::OnceInit::new(cloned)); pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) @@ -656,7 +659,7 @@ impl PyTTLCache { let shared = inner.shared(); let mut policy = inner.policy(); - policy.expire(shared)?; + policy.expire(shared.generation_version())?; if !reuse { policy.shrink_to_fit(shared); @@ -673,7 +676,7 @@ impl PyTTLCache { let inner = self.0.get(); let mut policy = inner.policy(); - policy.expire(inner.shared())?; + policy.expire(inner.shared().generation_version())?; if n < 0 { n += policy.entries().len() as isize; @@ -692,7 +695,7 @@ impl PyTTLCache { let inner = self.0.get(); let mut policy = inner.policy(); - policy.expire(inner.shared())?; + policy.expire(inner.shared().generation_version())?; match policy.entries().back() { Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), diff --git a/tests/test_impls.py b/tests/test_impls.py index 616f9a8..3b06870 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -1300,3 +1300,11 @@ def test_items_with_expire(self): assert key in obj assert val == obj[key] assert isinstance(ttl, float) + + def test_sweep_interval(self): + obj = cachebox.TTLCache(10, 3, {1: 1, 2: 2, 3: 3}, sweep_interval=3) + + # __len__ doesn't call expire itself + assert len(obj) == 3 + time.sleep(3.5) + assert len(obj) == 0 From 2210ae5594bbea333b1a27b0051988f7745f29e7 Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 26 May 2026 11:46:43 +0330 Subject: [PATCH 21/60] remove unused `py` parameter from eviction methods --- src/policies/fifopolicy.rs | 12 ++++-------- src/policies/lfupolicy.rs | 6 +++--- src/policies/lrupolicy.rs | 6 +++--- src/policies/nopolicy.rs | 6 +++--- src/policies/rrpolicy.rs | 6 +++--- src/policies/traits.rs | 4 ++-- src/policies/ttlpolicy.rs | 8 ++++---- src/policies/wrapped.rs | 6 +++--- src/pyclasses/cache.rs | 4 ++-- src/pyclasses/fifocache.rs | 4 ++-- src/pyclasses/lfucache.rs | 4 ++-- src/pyclasses/lrucache.rs | 4 ++-- src/pyclasses/rrcache.rs | 4 ++-- src/pyclasses/ttlcache.rs | 11 ++++------- 14 files changed, 39 insertions(+), 46 deletions(-) diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index bfd696c..c47f0d0 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -83,8 +83,8 @@ impl traits::VacantExt for Vacant<'_> { } #[inline] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { - self.policy.evict(py, self.shared)?; + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; Ok(()) } @@ -284,7 +284,7 @@ impl PolicyExt for FIFOPolicy { } } - fn evict(&mut self, py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { let front = self.entries.front(); if front.is_none() { return Err(new_py_error!(PyKeyError, ())); @@ -292,11 +292,7 @@ impl PolicyExt for FIFOPolicy { let front = unsafe { front.unwrap_unchecked() }; - let eq = |index: &usize| { - self.entries[(*index) - self.front_offset] - .key() - .py_eq(py, front.key()) - }; + let eq = |index: &usize| Ok::<_, pyo3::PyErr>((*index - self.front_offset) == 0); if std::hint::unlikely(self.table.remove_entry(front.key().hash(), eq)?.is_none()) { unreachable!("popitem key not found in table"); } diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs index 7a95a4d..de678a1 100644 --- a/src/policies/lfupolicy.rs +++ b/src/policies/lfupolicy.rs @@ -194,8 +194,8 @@ impl traits::VacantExt for Vacant<'_> { } #[inline] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { - self.policy.evict(py, self.shared)?; + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; Ok(()) } @@ -337,7 +337,7 @@ impl PolicyExt for LFUPolicy { } } - fn evict(&mut self, _py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { { let front_cursor = self .heap diff --git a/src/policies/lrupolicy.rs b/src/policies/lrupolicy.rs index 6f13243..0d2b2ba 100644 --- a/src/policies/lrupolicy.rs +++ b/src/policies/lrupolicy.rs @@ -72,8 +72,8 @@ impl traits::VacantExt for Vacant<'_> { } #[inline] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { - self.policy.evict(py, self.shared)?; + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; Ok(()) } @@ -209,7 +209,7 @@ impl PolicyExt for LRUPolicy { } } - fn evict(&mut self, _py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { { let front_cursor = match self.list.cursor_front() { Some(x) => x, diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 86babb3..08a2ab2 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -60,8 +60,8 @@ impl traits::VacantExt for Vacant<'_> { } #[inline(always)] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { - self.policy.evict(py, self.shared)?; + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; Ok(()) } @@ -162,7 +162,7 @@ impl traits::PolicyExt for NoPolicy { } #[inline] - fn evict(&mut self, _py: pyo3::Python, _shared: &Self::Shared) -> pyo3::PyResult { + fn evict(&mut self, _shared: &Self::Shared) -> pyo3::PyResult { Err(new_py_error!( PyOverflowError, "The cache has no algorithm to evict items" diff --git a/src/policies/rrpolicy.rs b/src/policies/rrpolicy.rs index 871d6dd..ab211ba 100644 --- a/src/policies/rrpolicy.rs +++ b/src/policies/rrpolicy.rs @@ -62,8 +62,8 @@ impl traits::VacantExt for Vacant<'_> { } #[inline(always)] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { - self.policy.evict(py, self.shared)?; + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; Ok(()) } @@ -164,7 +164,7 @@ impl PolicyExt for RRPolicy { } #[inline] - fn evict(&mut self, _py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { if self.table.is_empty() { Err(new_py_error!(PyKeyError, "cache is empty")) } else { diff --git a/src/policies/traits.rs b/src/policies/traits.rs index 81274fa..94cdae4 100644 --- a/src/policies/traits.rs +++ b/src/policies/traits.rs @@ -44,7 +44,7 @@ pub trait VacantExt { /// /// # Errors /// Returns any Python exception raised while dropping the evicted value. - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()>; + fn evict(&mut self) -> pyo3::PyResult<()>; /// Inserts `handle` into this slot. /// @@ -117,7 +117,7 @@ pub trait PolicyExt { ) -> pyo3::PyResult, Self::Vacant<'a>>>; /// Evicts a handle according to the policy algorithm, returning it. - fn evict(&mut self, py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult; + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult; /// Removes all handles without shrinking the allocation. fn clear(&mut self, shared: &Self::Shared); diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index 719033b..16118be 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -221,8 +221,8 @@ impl traits::VacantExt for Vacant<'_> { } #[inline] - fn evict(&mut self, py: pyo3::Python) -> pyo3::PyResult<()> { - self.policy.evict(py, self.shared)?; + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; Ok(()) } @@ -241,7 +241,7 @@ impl traits::VacantExt for Vacant<'_> { } pub struct TTLPolicy { - // fields are same as FIFOPolicy + // Fields are same as `FIFOPolicy` table: hashbrown::raw::RawTable, entries: VecDeque, currsize: usize, @@ -441,7 +441,7 @@ impl PolicyExt for TTLPolicy { } } - fn evict(&mut self, _py: pyo3::Python, shared: &Self::Shared) -> pyo3::PyResult { + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { let front = self.entries.pop_front(); if front.is_none() { return Err(new_py_error!(PyKeyError, "cache is empty")); diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs index 8966018..7d0c697 100644 --- a/src/policies/wrapped.rs +++ b/src/policies/wrapped.rs @@ -71,7 +71,7 @@ fn insert_inner( PolicyEntry::Vacant(mut vacant) => { // Evict if need while vacant.would_exceed(handle_size) { - vacant.evict(py)?; + vacant.evict()?; } vacant.insert(handle); @@ -82,7 +82,7 @@ fn insert_inner( if result.is_some() { // For the `PolicyEntry::Occupied` case, evict after replacement while lock.current_size() > shared.maxsize() { - lock.evict(py, shared)?; + lock.evict(shared)?; } } @@ -224,7 +224,7 @@ impl Wrapped

{ let mut count: pyo3::ffi::Py_ssize_t = 0; while count < n { - match lock.evict(py, &self.shared) { + match lock.evict(&self.shared) { Ok(_) => {} Err(err) => { if !err.is_instance_of::(py) { diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index d6135c7..e80caf4 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -393,11 +393,11 @@ impl PyCache { /// Remove and return a (key, value) pair as a 2-tuple. /// /// NOTE: `Cache` always raises `NotImplementedError` because has neither policy nor algorithm to evict items. - fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { let inner = self.0.get(); let mut policy = inner.policy(); - let handle = policy.evict(py, inner.shared())?; + let handle = policy.evict(inner.shared())?; drop(policy); let (key, val) = handle.into_pair(); diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index dc84788..bfc0970 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -398,11 +398,11 @@ impl PyFIFOCache { } /// Remove and return a (key, value) pair as a 2-tuple. - fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { let inner = self.0.get(); let mut policy = inner.policy(); - let handle = policy.evict(py, inner.shared())?; + let handle = policy.evict(inner.shared())?; drop(policy); let (key, val) = handle.into_pair(); diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs index 59868e5..24da082 100644 --- a/src/pyclasses/lfucache.rs +++ b/src/pyclasses/lfucache.rs @@ -417,11 +417,11 @@ impl PyLFUCache { } /// Remove and return a (key, value) pair as a 2-tuple. - fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { let inner = self.0.get(); let mut policy = inner.policy(); - let handle = policy.evict(py, inner.shared())?; + let handle = policy.evict(inner.shared())?; drop(policy); let (key, val) = handle.into_pair(); diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs index cfcefad..1a10f70 100644 --- a/src/pyclasses/lrucache.rs +++ b/src/pyclasses/lrucache.rs @@ -425,11 +425,11 @@ impl PyLRUCache { } /// Remove and return a (key, value) pair as a 2-tuple. - fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { let inner = self.0.get(); let mut policy = inner.policy(); - let handle = policy.evict(py, inner.shared())?; + let handle = policy.evict(inner.shared())?; drop(policy); let (key, val) = handle.into_pair(); diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs index 30b4039..9e35f12 100644 --- a/src/pyclasses/rrcache.rs +++ b/src/pyclasses/rrcache.rs @@ -398,11 +398,11 @@ impl PyRRCache { /// Remove and return a (key, value) pair as a 2-tuple. /// /// NOTE: `Cache` always raises `NotImplementedError` because has neither policy nor algorithm to evict items. - fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { let inner = self.0.get(); let mut policy = inner.policy(); - let handle = policy.evict(py, inner.shared())?; + let handle = policy.evict(inner.shared())?; drop(policy); let (key, val) = handle.into_pair(); diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index 712da46..3587a22 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -462,11 +462,11 @@ impl PyTTLCache { } /// Remove and return a (key, value) pair as a 2-tuple. - fn popitem(&self, py: pyo3::Python) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { let inner = self.0.get(); let mut policy = inner.policy(); - let handle = policy.evict(py, inner.shared())?; + let handle = policy.evict(inner.shared())?; drop(policy); let (key, val) = handle.into_pair(); @@ -765,14 +765,11 @@ impl PyTTLCache { } } - fn popitem_with_expire( - &self, - py: pyo3::Python, - ) -> pyo3::PyResult<(alias::PyObject, alias::PyObject, f64)> { + fn popitem_with_expire(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject, f64)> { let inner = self.0.get(); let mut policy = inner.policy(); - let handle = policy.evict(py, inner.shared())?; + let handle = policy.evict(inner.shared())?; drop(policy); let dur = handle From ff52f60dfdb9d70ffa8a918e974b4e7a07711a0b Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 26 May 2026 12:26:37 +0330 Subject: [PATCH 22/60] Use `u128` instead of `usize` in LFU frequency counter to support more range. --- src/internal/linked_list.rs | 1 + src/policies/fifopolicy.rs | 2 ++ src/policies/lfupolicy.rs | 10 +++++----- src/policies/ttlpolicy.rs | 1 + 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/internal/linked_list.rs b/src/internal/linked_list.rs index f9d0da1..fc48941 100644 --- a/src/internal/linked_list.rs +++ b/src/internal/linked_list.rs @@ -2,6 +2,7 @@ use std::marker::PhantomData; use std::mem; use std::ptr::NonNull; +/// [`LinkedList`]'s node pub struct Node { next: Option>>, prev: Option>>, diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index c47f0d0..6c333f2 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -10,6 +10,7 @@ use crate::policies::traits::SharedExt; pub use super::common::Handle; pub use super::common::Shared; +/// Shorthand for `self.entries[index - self.front_offset]` macro_rules! get_handle { (&$slf:expr, $index:expr) => { &$slf.entries[$index - $slf.front_offset] @@ -162,6 +163,7 @@ impl FIFOPolicy { #[cfg(not(feature = "small-offset"))] const MAX_FRONT_OFFSET: usize = usize::MAX - isize::MAX as usize; + // Use u8::MAX as maximum front offset, useful for tests #[cfg(feature = "small-offset")] const MAX_FRONT_OFFSET: usize = u8::MAX as usize; diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs index de678a1..1f05f74 100644 --- a/src/policies/lfupolicy.rs +++ b/src/policies/lfupolicy.rs @@ -11,7 +11,7 @@ pub use crate::policies::common::Shared; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] #[repr(transparent)] -pub struct Frequency(usize); +pub struct Frequency(u128); impl Frequency { #[inline(always)] @@ -37,7 +37,7 @@ impl FrequencyHandle { key: alias::PyObject, value: alias::PyObject, // initial frequency - frequency: usize, + frequency: u128, ) -> pyo3::PyResult { Self::with_precomputed_hash_key( py, @@ -56,7 +56,7 @@ impl FrequencyHandle { key: utils::PrecomputedHashObject, value: alias::PyObject, // initial frequency - frequency: usize, + frequency: u128, ) -> pyo3::PyResult { let size = getsizeof.call(py, key.as_ref(), &value)?; Ok(Self { @@ -69,7 +69,7 @@ impl FrequencyHandle { /// Returns the frequency. #[inline] - pub fn frequency(&self) -> usize { + pub fn frequency(&self) -> u128 { self.frequency.0 } @@ -314,7 +314,7 @@ impl PolicyExt for LFUPolicy { key: &::Key, shared: &'a Self::Shared, ) -> pyo3::PyResult, Self::Vacant<'a>>> { - let eq = |cursor: &lazyheap::Cursor| unsafe { + let eq = |cursor: &lazyheap::Cursor| unsafe { key.py_eq(py, cursor.element().key()) }; diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index 16118be..e0d2fb6 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -483,6 +483,7 @@ impl PolicyExt for TTLPolicy { self.front_offset = 0; } + // TODO: considering expired handles fn py_eq( &self, py: pyo3::Python, From ae5dfc651d52c1f0b72209ab9397112834255e41 Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 26 May 2026 13:01:09 +0330 Subject: [PATCH 23/60] Update documentations to use Google Style. --- cachebox/_cachebox.py | 174 +++--- cachebox/_core.pyi | 1197 ++++++++++++++++++++++-------------- src/policies/lfupolicy.rs | 8 +- src/policies/traits.rs | 2 - src/pyclasses/cache.rs | 34 +- src/pyclasses/fifocache.rs | 36 +- src/pyclasses/lfucache.rs | 38 +- src/pyclasses/lrucache.rs | 34 +- src/pyclasses/rrcache.rs | 34 +- src/pyclasses/ttlcache.rs | 2 +- src/typeref.rs | 4 + 11 files changed, 945 insertions(+), 618 deletions(-) diff --git a/cachebox/_cachebox.py b/cachebox/_cachebox.py index 1af8c09..ada5cdb 100644 --- a/cachebox/_cachebox.py +++ b/cachebox/_cachebox.py @@ -22,72 +22,73 @@ class TTLCache(_CoreTTLCache): """ - A Time-To-Live (TTL) cache eviction policy: each entry carries an expiration timestamp - and is considered stale — and eligible for eviction — once that deadline has passed, - regardless of how recently or frequently it was accessed. - - ## How It Works - The TTL algorithm pairs time-based expiration with insertion-order eviction. Every entry - is stamped with an absolute `expires_at` timestamp at insertion time (computed as - `now + global_ttl`). Entries are stored in insertion order, and eviction proceeds from the - front of that queue — but only after confirming the candidate has actually expired. A live - entry at the front of the queue blocks eviction of everything behind it, so the cache may - temporarily exceed capacity if the oldest entries are still fresh. - - Like `FIFOPolicy`, this implementation backs the queue with a `double-ended queue` for O(1) - front removal and a `hash map` for O(1) key lookups. The same logical-index trick applies: - the table stores monotonically increasing counters rather than physical deque positions, and - a `front_offset` counter converts a logical index back to a physical one at read time via - `entries[table[key] - front_offset]`. This keeps eviction and lookup O(1) without rewriting - the table on every eviction. On top of that, every read checks `expires_at` against the current wall-clock time and - treats any expired entry as a cache miss. - - Without `sweep_interval`, an expiry sweep is triggered automatically on every call to - `insert`, `update`, `current_size`, `remaining_size`, `last`, `first`, `items`, `keys`, - `values`, and `__iter__`. A completely idle cache will accumulate stale entries between - these calls, but any normal interaction with the cache is sufficient to reclaim them. - When `sweep_interval` is set, a background thread performs the sweep on that interval - instead, reclaiming expired entries independent of any method calls. - - ### Pros - - Insert, lookup, and evict are all O(1) amortized: the `front_offset` trick eliminates the O(n) - index-shifting that a naïve implementation would require on every eviction. - - Entries expire automatically without any background thread or explicit invalidation call. - Stale data is never returned to the caller. - - TTL expiry and insertion-order eviction compose cleanly: the oldest entry is always evicted - first among those that have already expired. - - A single `global_ttl` keeps configuration simple; every entry ages at the same rate. - - ### Cons - - - Wall-clock dependency. Correctness relies on a monotonically advancing system clock. - Clock adjustments (NTP steps, suspend/resume) can cause entries to expire earlier or later - than intended. - - When `sweep_interval` is set, a background thread wakes on that interval to sweep and - remove all expired entries. This adds a small amount of background CPU usage and - introduces a reaper thread for the lifetime of the cache. - - No per-entry TTL override. All entries share `global_ttl`; mixed expiry requirements need - a different policy or a wrapper layer. - - The rare O(n) index rebase (triggered when `front_offset` nears `usize::MAX - isize::MAX`) - introduces an occasional latency spike. Amortized cost is negligible, but worst-case - latency is unbounded in principle. - - ## When to use it - Reach for `TTLPolicy` when: - - Cached data has a natural freshness window: API responses, auth tokens, DNS records, - rate-limit counters, or any value that becomes incorrect or unsafe after a known interval. - - You need automatic expiry without a background reaper thread — expiry sweeps on common - method calls are sufficient, or you want continuous reclamation via `sweep_interval`. - - Access patterns are unpredictable or uniform enough that recency- or frequency-based - eviction (LRU/LFU) would offer no meaningful advantage. - - Avoid it when: - - Your workload has strong temporal locality and you need a best-effort hit rate policy — - LRU will serve you better. - - Per-entry TTL granularity is required. If different keys need different lifetimes, - consider `VTTLCache`. - - Your environment has an unreliable or adjustable system clock, where wall-clock-based - expiry may behave unexpectedly. + A cache with a Time-To-Live (TTL) eviction policy. + + Each entry carries an expiration timestamp and is considered stale — and + eligible for eviction — once that deadline has passed, regardless of how + recently or frequently it was accessed. + + Every entry is stamped with an absolute ``expires_at`` timestamp at + insertion time (computed as ``now + global_ttl``). Entries are stored in + insertion order and eviction proceeds from the front of that queue, but + only after confirming the candidate has actually expired. A live entry at + the front of the queue blocks eviction of everything behind it, so the + cache may temporarily exceed capacity if the oldest entries are still + fresh. + + Like ``FIFOCache``, this implementation backs the queue with a + double-ended queue for O(1) front removal and a hash map for O(1) key + lookups. The same logical-index trick applies: the table stores + monotonically increasing counters rather than physical deque positions, and + a ``front_offset`` counter converts a logical index back to a physical one + at read time via ``entries[table[key] - front_offset]``. This keeps + eviction and lookup O(1) without rewriting the table on every eviction. + Every read also checks ``expires_at`` against the current wall-clock time + and treats any expired entry as a cache miss. + + Without ``sweep_interval``, an expiry sweep is triggered automatically on + every call to ``insert``, ``update``, ``current_size``, ``remaining_size``, + ``last``, ``first``, ``items``, ``keys``, ``values``, and ``__iter__``. A + completely idle cache will accumulate stale entries between these calls, + but any normal interaction is sufficient to reclaim them. When + ``sweep_interval`` is set, a background thread performs the sweep on that + interval instead, reclaiming expired entries independent of method calls. + + Pros: + - Insert, lookup, and evict are all O(1) amortized: the + ``front_offset`` trick eliminates the O(n) index-shifting that a + naive implementation would require on every eviction. + - Entries expire automatically without a background thread or explicit + invalidation call; stale data is never returned to the caller. + - TTL expiry and insertion-order eviction compose cleanly: the oldest + expired entry is always evicted first. + - A single ``global_ttl`` keeps configuration simple; every entry ages + at the same rate. + + Cons: + - Wall-clock dependency: correctness relies on a monotonically + advancing system clock. Clock adjustments (NTP steps, + suspend/resume) can cause entries to expire earlier or later than + intended. + - When ``sweep_interval`` is set, a background thread wakes on that + interval to remove all expired entries, adding a small amount of + background CPU usage for the lifetime of the cache. + - No per-entry TTL override: all entries share ``global_ttl``; mixed + expiry requirements need a different policy or a wrapper layer. + - A rare O(n) index rebase (triggered when ``front_offset`` nears + ``usize::MAX - isize::MAX``) introduces an occasional latency spike; + amortised cost is negligible but worst-case latency is unbounded in + principle. + + Use ``TTLCache`` when cached data has a natural freshness window (API + responses, auth tokens, DNS records, rate-limit counters), when automatic + expiry without a background reaper is sufficient, or when access patterns + are unpredictable enough that recency- or frequency-based eviction would + offer no meaningful advantage. + + Avoid it when strong temporal locality makes LRU a better fit, when + per-entry TTL granularity is required (consider ``VTTLCache`` instead), or + when the system clock is unreliable or subject to adjustment. """ def __init__( @@ -101,22 +102,33 @@ def __init__( sweep_interval: float | timedelta | None = None, ) -> None: """ - Initialize a new instance. + Initializes a new TTLCache instance. Args: - maxsize: Maximum number of elements the cache can hold. If zero, the limit is set to sys.maxsize internally. - global_ttl: Time-to-live for every entry, either as seconds (float) or a timedelta. Applied at insertion time. + maxsize: Maximum number of elements the cache can hold. If zero, + the limit is set to ``sys.maxsize`` internally. + global_ttl: Time-to-live for every entry, as seconds (float) or a + ``timedelta``. Applied at insertion time. iterable: Initial data to populate the cache. - capacity: Pre-allocate cache capacity to minimize reallocations. Defaults to 0. - getsizeof: A callable that computes the size of a key-value pair. When `None`, each - entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). - Use this to implement weighted caching — for example, sizing entries by - memory footprint or byte length. - sweep_interval: If set, starts a background thread that sweeps and removes all expired entries on this interval. - When None, expiry is lazy. Defaults to `None`. *It should be more than 1*. - - The cache can be pre-sized via `capacity` to reduce reallocations when - the number of expected entries is known ahead of time. + capacity: Pre-allocate cache capacity to minimize reallocations. + Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. + When ``None``, each entry is assumed to have a size of 1 + (equivalent to ``lambda k, v: 1``). Use this to implement + weighted caching — for example, sizing entries by memory + footprint or byte length. + sweep_interval: If set, starts a background thread that sweeps and + removes all expired entries on this interval (in seconds or as + a ``timedelta``). When ``None``, expiry is lazy. Defaults to + ``None``. Must be greater than or equal to 1. + + Note: + The cache can be pre-sized via ``capacity`` to reduce + reallocations when the number of expected entries is known + ahead of time. + + Raises: + ValueError: If ``sweep_interval`` is set to a value less than 1. """ super().__init__( maxsize, @@ -148,7 +160,7 @@ def __init__( @property def sweep_interval(self) -> float | None: - """Returns the configured `sweep_interval`.""" + """The configured ``sweep_interval`` in seconds.""" return self._sweep_interval def _sweeper_thread(self, interval: float): @@ -157,7 +169,7 @@ def _sweeper_thread(self, interval: float): self.expire() def stop_sweeper(self) -> None: - """Signals the sweeper thread to stop ( if is active )""" + """Signals the background sweeper thread to stop, if one is active.""" self._thread_is_running = False def __del__(self) -> None: diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 3010a1d..fd98568 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -16,62 +16,96 @@ _IterableType: typing.TypeAlias = ( class BaseCacheImpl(typing.Generic[KT, VT]): """ - Base implementation for cache classes in the cachebox library. + Base implementation for cache classes. - This abstract base class defines the generic structure for cache implementations, - supporting different key and value types through generic type parameters. - Serves as a foundation for specific cache variants like Cache and FIFOCache. + This abstract base class defines the generic structure for cache + implementations. """ + def __new__(cls, *args, **kwds) -> typing.Self: + """ + Allocates memory and returns an uninitialized instance. + + Warning: + Using the returned instance before calling ``__init__`` is unsafe + and causes panic errors. + """ + ... + def __init__( self, maxsize: int, iterable: _IterableType[KT, VT] | None = None, *, capacity: int = 0, - getsizeof: typing.Callable[[KT, VT]] | None = None, + getsizeof: typing.Callable[[KT, VT], int] | None = None, ) -> None: """ - Initialize a new instance. + Initializes a new instance. Args: - maxsize: Maximum number of elements the cache can hold. If zero, the limit is set to sys.maxsize internally. + maxsize: Maximum number of elements the cache can hold. If zero, + the limit is set to ``sys.maxsize`` internally. iterable: Initial data to populate the cache. - capacity: Pre-allocate cache capacity to minimize reallocations. Defaults to 0. - getsizeof: A callable that computes the size of a key-value pair. When `None`, each - entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). - Use this to implement weighted caching — for example, sizing entries by - memory footprint or byte length. + capacity: Pre-allocate cache capacity to minimize reallocations. + Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. + When ``None``, each entry is assumed to have a size of 1 + (equivalent to ``lambda k, v: 1``). Use this to implement + weighted caching — for example, sizing entries by memory + footprint or byte length. - The cache can be pre-sized via `capacity` to reduce reallocations when - the number of expected entries is known ahead of time. + Note: + The cache can be pre-sized via ``capacity`` to reduce + reallocations when the number of expected entries is known + ahead of time. """ ... @property def maxsize(self) -> int: - """Returns the specified `maxsize`""" + """int: The configured ``maxsize``.""" ... @property - def getsizeof(self) -> typing.Callable[[KT, VT]] | None: - """Returns the `getsizeof` function""" + def getsizeof(self) -> typing.Callable[[KT, VT], int] | None: + """Callable or None: The configured ``getsizeof`` function.""" ... def current_size(self) -> int: - """Returns the current total cumulative size consumed by all stored entries.""" + """ + Returns the current total cumulative size of all stored entries. + + Returns: + The sum of sizes of all entries currently in the cache. + """ ... def remaining_size(self) -> int: - """Returns the remaining size. Equals to `maxsize - current_size`""" + """ + Returns the remaining available size. + + Returns: + The result of ``maxsize - current_size``. + """ ... def capacity(self) -> int: - """Returns the number of elements the map can hold without reallocating.""" + """ + Returns the number of elements the map can hold without reallocating. + + Returns: + The current allocated capacity. + """ ... def __len__(self) -> int: - """Returns the number of entries currently in the cache.""" + """ + Returns the number of entries currently in the cache. + + Returns: + The number of entries in the cache. + """ ... def __sizeof__(self) -> int: ... @@ -79,19 +113,35 @@ class BaseCacheImpl(typing.Generic[KT, VT]): def __contains__(self, key: KT) -> bool: ... def contains(self, key: KT) -> bool: """ - Returns `true` if the cache contains an entry for `key`. Equals to `key in self`. + Returns ``True`` if the cache contains an entry for ``key``. + + Equivalent to ``key in self``. Prefer this method over ``key in self`` + to keep code compatible across different cache policies. + + Args: + key: The key to look up. - It's recommended to use this method instead of `key in self`, as it keeps code - compatible across different cache policies. + Returns: + ``True`` if the key exists in the cache, ``False`` otherwise. """ ... def is_empty(self) -> bool: - """Returns `True` if cache is empty. Exactly like `bool(self)`.""" + """ + Returns ``True`` if the cache is empty. + + Returns: + ``True`` if the cache contains no entries. + """ ... def is_full(self) -> bool: - """Returns `True` when the cumulative size has reached the maxsize limit.""" + """ + Returns ``True`` when the cumulative size has reached the maxsize limit. + + Returns: + ``True`` if the cache is at capacity. + """ ... def insert( @@ -117,9 +167,17 @@ class BaseCacheImpl(typing.Generic[KT, VT]): ) -> typing.Optional[VT | DT]: ... def pop(self, key: KT, default: DT = ...) -> typing.Union[VT, DT]: """ - Removes specified key and returns the corresponding value. + Removes the specified key and returns the corresponding value. + + Args: + key: The key to remove. + default: Value to return if the key is not found. - If the key is not found, returns the `default` if given; otherwise, raise a KeyError. + Returns: + The value associated with ``key``, or ``default`` if not found. + + Raises: + KeyError: If the key is not found and no ``default`` is provided. """ ... @@ -127,7 +185,13 @@ class BaseCacheImpl(typing.Generic[KT, VT]): def popitem(self) -> typing.Tuple[KT, VT]: ... def drain(self, n: int) -> int: """ - Calls the `popitem()` `n` times and returns count of removed items. + Calls ``popitem()`` ``n`` times and returns the count of removed items. + + Args: + n: The number of items to remove. + + Returns: + The number of items successfully removed. """ ... @@ -137,9 +201,11 @@ class BaseCacheImpl(typing.Generic[KT, VT]): def clear(self, *, reuse: bool = False) -> None: """ - Removes all items from cache. + Removes all items from the cache. - If `reuse` is True, will not free the memory for reusing in the future. + Args: + reuse: If ``True``, retains the allocated memory for future reuse + rather than freeing it. Defaults to ``False``. """ ... @@ -156,62 +222,73 @@ class BaseCacheImpl(typing.Generic[KT, VT]): class Cache(BaseCacheImpl[KT, VT]): """ A thread-safe, memory-efficient key-value cache with no eviction policy. - items remain in the cache until manually removed or the cache is cleared. - - ## How It Works - `Cache` is essentially a configurable hashmap-like store. When an item is inserted: - - It is stored directly without any ordering, priority tracking, or access metadata. - - If a maximum size is configured, insertions beyond that limit are rejected (raises OverflowError). - - All read and write operations are thread-safe, making it safe for concurrent access without - external locking. - - Because no eviction logic runs in the background, there is no overhead from tracking usage order, - frequency counters, or expiry timestamps. - - ### Pros - - Minimal overhead - no bookkeeping for eviction means lower CPU and memory usage per entry compared - to policy-based caches. - - Predictable behavior - items are never silently removed, so cache hits are deterministic once an - item is stored. - - Thread-safe - safe for concurrent reads and writes out of the box. - - Configurable capacity - a hard size limit prevents unbounded memory growth. - - ### Cons - - No automatic eviction - the cache can fill up and stop accepting new entries if a max size is set, - requiring manual management. - - Unordered - unlike a standard dict (Python 3.7+), insertion order is not preserved. - - Not suitable for volatile data - stale entries persist forever unless explicitly invalidated. - - ## When to Use It - `Cache` is the right choice when: - - You have a fixed, well-known set of keys that are expensive to compute and never go stale - (e.g., parsed config values, compiled regex patterns, loaded templates). - - The cached data has no meaningful expiry - it's either always valid or always explicitly invalidated. - - You need the lowest possible overhead and can guarantee the cache won't grow uncontrollably. - - Avoid it when cached data can become stale, when the working set is unpredictable in size, or when you need automatic - memory pressure relief. + + Items remain in the cache until manually removed or the cache is cleared. + + ``Cache`` is essentially a configurable hashmap-like store. When an item is + inserted, it is stored directly without any ordering, priority tracking, or + access metadata. If a maximum size is configured, insertions beyond that + limit are rejected with an ``OverflowError``. All read and write operations + are thread-safe. + + Because no eviction logic runs in the background, there is no overhead from + tracking usage order, frequency counters, or expiry timestamps. + + Pros: + - Minimal overhead: no bookkeeping for eviction means lower CPU and + memory usage per entry compared to policy-based caches. + - Predictable behavior: items are never silently removed, so cache hits + are deterministic once an item is stored. + - Thread-safe: safe for concurrent reads and writes out of the box. + - Configurable capacity: a hard size limit prevents unbounded memory + growth. + + Cons: + - No automatic eviction: the cache can fill up and stop accepting new + entries if a max size is set, requiring manual management. + - Unordered: unlike a standard ``dict`` (Python 3.7+), insertion order + is not preserved. + - Not suitable for volatile data: stale entries persist forever unless + explicitly invalidated. + + Use ``Cache`` when you have a fixed, well-known set of keys that are + expensive to compute and never go stale (e.g. parsed config values, + compiled regex patterns, loaded templates), and when the lowest possible + overhead is required. + + Avoid it when cached data can become stale, when the working set is + unpredictable in size, or when automatic memory pressure relief is needed. """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ - Equals to `self[key] = value`, but returns a value: + Inserts a key-value pair and returns the previous value if present. + + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; + Args: + key: The key to insert or update. + value: The value to associate with ``key``. - It's recommended to use this method instead of `self[key] = value`, as it keeps code - compatible across different cache policies. + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). - Note: raises `OverflowError` if the cache reached the maxsize limit, - because this class does not have any algorithm. + Raises: + OverflowError: If the cache has reached its ``maxsize`` limit, + since this class has no eviction algorithm. """ ... def update(self, iterable: _IterableType[KT, VT]) -> None: """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. """ ... @@ -223,8 +300,12 @@ class Cache(BaseCacheImpl[KT, VT]): """ Retrieves the value for a given key from the cache. - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. """ ... @@ -234,109 +315,131 @@ class Cache(BaseCacheImpl[KT, VT]): default: typing.Optional[DT] = None, ) -> typing.Optional[VT | DT]: """ - Inserts key with a value of default if key is not in the cache. + Inserts ``key`` with ``default`` as its value if the key is absent. - Returns the value for key if key is in the cache, else default. + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. """ ... def popitem(self) -> typing.Tuple[KT, VT]: - """Always raises `OverflowError` because `Cache` has neither policy nor algorithm to evict items.""" + """ + Always raises ``OverflowError``. + + ``Cache`` has no policy or algorithm to select an item for eviction. + + Raises: + OverflowError: Always, because ``Cache`` has no eviction policy. + """ ... def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: """ - Returns an iterable object of the cache's items (key-value pairs). + Returns an iterable of the cache's ``(key, value)`` pairs. - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are not ordered. + Warning: + Do not modify the cache while iterating. Items are not ordered. + + Returns: + An iterable of ``(key, value)`` tuples. """ ... def keys(self) -> typing.Iterable[KT]: """ - Returns an iterable object of the cache's keys. + Returns an iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. Keys are not ordered. - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are not ordered. + Returns: + An iterable of keys. """ ... def values(self) -> typing.Iterable[VT]: """ - Returns an iterable object of the cache's values. + Returns an iterable of the cache's values. + + Warning: + Do not modify the cache while iterating. Values are not ordered. - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are not ordered. + Returns: + An iterable of values. """ ... class FIFOCache(BaseCacheImpl[KT, VT]): """ - A First-In-First-Out (FIFO) cache eviction policy: when the cache is full, the oldest - inserted item is always the first to be removed, regardless of how often it has been accessed. - - ## How It Works - The FIFO algorithm is one of the simplest cache eviction strategies. Items are stored in - insertion order, and when the cache reaches capacity, the item that has been there the - longest is evicted to make room. There is no concept of "recently used" or "frequently used" - - age alone determines eviction order. Conceptually, it behaves like a queue: new items - join the back, and evictions come from the front. - - This implementation backs that queue with a `double-ended queue` for O(1) front removal, - paired with a `hash map` for O(1) key lookups. Rather than storing physical indices into - the deque (which shift every time an item is evicted from the front), the table stores - logical indices - a monotonically increasing counter assigned at insertion time. - A separate `front_offset` counter tracks how many items have ever been evicted; the physical - position of any key is recovered at read time as `entries[table[key] - front_offset]`, - keeping both eviction and lookup O(1) without any per-eviction rewriting of the table. - - ### Pros - - Insert, lookup, and evict are all O(1) amortized: the `front_offset` trick eliminates the O(n) - index-shifting that a native implementation would require on every eviction. - - Eviction order is fully deterministic: the oldest item always goes first, independent of access - patterns, making behaviour easy to reason about and reproduce in tests. - - No per-read overhead. Unlike LRU, FIFO requires no bookkeeping on cache hits. - - ### Cons - - Access-blind eviction. A hot item accessed thousands of times is evicted just as readily as one - that has never been read. Hit rates suffer on workloads with strong temporal locality. - - The logical-index indirection adds a layer of internal complexity compared to a naïve queue-based cache. - - The rare O(n) index rebase (triggered when `front_offset` nears `usize::MAX - isize::MAX`) introduces - an occasional latency spike. Amortized cost is negligible, but worst-case latency is unbounded in principle. - - ## When to use it - Reach for `FIFOPolicy` when: - - Eviction order must be predictable and auditable: streaming pipelines, sequential batch processors, or - any context where deterministic behaviour simplifies debugging. - - Access patterns are roughly uniform, so there is no meaningful "hot" subset of keys that a recency or - frequency-aware policy could exploit. - - Read overhead must be minimal: FIFO's zero-cost hits make it preferable to LRU in insert-heavy workloads - with infrequent re-reads. - - Avoid it when your workload has strong temporal locality. If recently or frequently accessed items are likely - to be needed again soon, an LRU or LFU policy will deliver meaningfully better hit rates. + A cache with a First-In-First-Out (FIFO) eviction policy. + + When the cache is full, the oldest inserted item is always the first to be + removed, regardless of how often it has been accessed. + + Items are stored in insertion order. When capacity is reached, the item + that has been present the longest is evicted. There is no concept of + "recently used" or "frequently used" — age alone determines eviction order. + Conceptually it behaves like a queue: new items join the back and evictions + come from the front. + + This implementation backs that queue with a double-ended queue for O(1) + front removal, paired with a hash map for O(1) key lookups. Logical indices + (a monotonically increasing counter) are stored in the table rather than + physical deque positions, so eviction never requires rewriting the index. + A ``front_offset`` counter recovers physical positions at read time as + ``entries[table[key] - front_offset]``. + + Pros: + - Insert, lookup, and evict are all O(1) amortized. + - Eviction order is fully deterministic and easy to reason about. + - No per-read overhead: unlike LRU, FIFO requires no bookkeeping on + cache hits. + + Cons: + - Access-blind eviction: a hot item is evicted just as readily as one + never read, hurting hit rates on workloads with temporal locality. + - Logical-index indirection adds internal complexity vs. a naive queue. + - A rare O(n) index rebase (when ``front_offset`` nears + ``usize::MAX - isize::MAX``) introduces an occasional latency spike. + + Use ``FIFOCache`` when eviction order must be predictable and auditable, + access patterns are roughly uniform, or read overhead must be minimal + (insert-heavy workloads with infrequent re-reads). + + Avoid it when the workload has strong temporal locality; in those cases LRU + or LFU will deliver meaningfully better hit rates. """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ - Equals to `self[key] = value`, but returns a value: + Inserts a key-value pair and returns the previous value if present. - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. + + Args: + key: The key to insert or update. + value: The value to associate with ``key``. - It's recommended to use this method instead of `self[key] = value`, as it keeps code - compatible across different cache policies. + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). """ ... def update(self, iterable: _IterableType[KT, VT]) -> None: """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. """ ... @@ -346,126 +449,154 @@ class FIFOCache(BaseCacheImpl[KT, VT]): default: typing.Optional[DT] = None, ) -> typing.Optional[VT | DT]: """ - Inserts key with a value of default if key is not in the cache. + Inserts ``key`` with ``default`` as its value if the key is absent. - Returns the value for key if key is in the cache, else default. + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. """ ... def popitem(self) -> typing.Tuple[KT, VT]: """ - Removes the element that has been in the cache the longest. + Removes and returns the oldest item in the cache. + + Returns: + A ``(key, value)`` tuple for the item that was inserted first. + + Raises: + KeyError: If the cache is empty. """ ... def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: """ - Returns an iterable object of the cache's items (key-value pairs). + Returns an ordered iterable of the cache's ``(key, value)`` pairs. + + Warning: + Do not modify the cache while iterating. - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are ordered. + Returns: + An iterable of ``(key, value)`` tuples in insertion order. """ ... def keys(self) -> typing.Iterable[KT]: """ - Returns an iterable object of the cache's keys. + Returns an ordered iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are ordered. + Returns: + An iterable of keys in insertion order. """ ... def values(self) -> typing.Iterable[VT]: """ - Returns an iterable object of the cache's values. + Returns an ordered iterable of the cache's values. - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are ordered. + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of values in insertion order. """ ... def first(self, n: int = 0) -> typing.Optional[KT]: """ - Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). - By using `n` parameter, you can browse order index by index. + Returns the key at position ``n`` in insertion order. + + The key at position 0 is the one that will be removed by ``popitem()``. - Raises `IndexError` if cache is empty, or `n` is out of range. + Args: + n: The index to look up. Defaults to 0 (the oldest item). + + Returns: + The key at the given index. + + Raises: + IndexError: If the cache is empty or ``n`` is out of range. """ ... def last(self) -> typing.Optional[KT]: """ - Returns the last key in cache. Equals to `self.first(-1)`. + Returns the most recently inserted key. Equivalent to ``self.first(-1)``. - Raises `IndexError` if cache is empty. + Returns: + The key of the most recently inserted item. + + Raises: + IndexError: If the cache is empty. """ ... class RRCache(BaseCacheImpl[KT, VT]): - """ - A thread-safe, memory-efficient key-value cache with Random Replacement eviction policy. - When the cache reaches its maximum size, an item is randomly selected and + """A thread-safe, memory-efficient cache with a Random Replacement eviction policy. + + When the cache reaches its maximum size, a randomly selected item is evicted to make room for new entries. - ## How It Works - `RRCache` is a configurable hashmap-like store with automatic eviction. When an item is inserted: - - It is stored directly without any ordering or priority tracking. - - If a maximum size is configured and the cache is full, a random entry is evicted to make room - for the new item. - - All read and write operations are thread-safe, making it safe for concurrent access without - external locking. - - The Random Replacement policy selects entries for eviction uniformly at random, ensuring fair - treatment across all cached items regardless of access patterns. - - ### Pros - - Low overhead: Random Replacement is computationally cheap compared to tracking access order or frequency. - - Thread-safe: safe for concurrent reads and writes out of the box. - - Configurable capacity: a hard size limit prevents unbounded memory growth while allowing new entries - through automatic eviction. - - No staleness issues: items persist only as long as they remain unselected by the eviction policy, - preventing indefinite accumulation of stale data. - - ### Cons - - Non-deterministic eviction: random selection means you cannot predict which entry will be removed, - potentially evicting recently cached or frequently accessed items. - - Unordered: insertion order is not preserved. - - Less optimal than LRU/LFU: for workloads with skewed access patterns, Random Replacement will - evict frequently used items more often than policy-aware caches. - - ## When to Use It - `RRCache` is the right choice when: - - You have a working set that can grow unpredictably and requires automatic memory management. - - Access patterns are relatively uniform and predictable, so random eviction is not significantly - worse than smarter policies. - - You need low computational overhead and simple eviction logic. - - You want to prevent unbounded memory growth without the complexity of tracking usage metadata. - - Avoid it when you have highly skewed access patterns (where certain items are accessed far more - frequently than others), when cache hits are mission-critical and predictability matters, or when - you need fine-grained control over what gets evicted. + Items are stored without any ordering or priority tracking. The Random + Replacement policy selects entries for eviction uniformly at random, + ensuring fair treatment across all cached items regardless of access + patterns. + + Pros: + - Low overhead: computationally cheap compared to tracking access order + or frequency. + - Thread-safe: safe for concurrent reads and writes out of the box. + - Configurable capacity: a hard size limit prevents unbounded memory + growth while allowing new entries through automatic eviction. + - No indefinite staleness: items are eventually replaced by the + eviction policy. + + Cons: + - Non-deterministic eviction: random selection means recently cached or + frequently accessed items may be unexpectedly removed. + - Unordered: insertion order is not preserved. + - Less optimal than LRU/LFU on skewed access patterns. + + Use ``RRCache`` when the working set can grow unpredictably, access + patterns are roughly uniform, and low overhead with simple eviction logic + is preferred. + + Avoid it when access patterns are highly skewed, cache hits are + mission-critical, or fine-grained eviction control is required. """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ - Equals to `self[key] = value`, but returns a value: + Inserts a key-value pair and returns the previous value if present. + + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; + Args: + key: The key to insert or update. + value: The value to associate with ``key``. - It's recommended to use this method instead of `self[key] = value`, as it keeps code - compatible across different cache policies. + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). """ ... def update(self, iterable: _IterableType[KT, VT]) -> None: """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. """ ... @@ -477,8 +608,12 @@ class RRCache(BaseCacheImpl[KT, VT]): """ Retrieves the value for a given key from the cache. - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. """ ... @@ -488,141 +623,143 @@ class RRCache(BaseCacheImpl[KT, VT]): default: typing.Optional[DT] = None, ) -> typing.Optional[VT | DT]: """ - Inserts key with a value of default if key is not in the cache. + Inserts ``key`` with ``default`` as its value if the key is absent. - Returns the value for key if key is in the cache, else default. + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. """ ... def popitem(self) -> typing.Tuple[KT, VT]: - """Randomly selects and removes a (key, value) pair from the cache.""" + """ + Randomly selects, removes, and returns a ``(key, value)`` pair. + + Returns: + A randomly chosen ``(key, value)`` tuple. + + Raises: + KeyError: If the cache is empty. + """ ... def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: """ - Returns an iterable object of the cache's items (key-value pairs). + Returns an iterable of the cache's ``(key, value)`` pairs. - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are not ordered. + Warning: + Do not modify the cache while iterating. Items are not ordered. + + Returns: + An iterable of ``(key, value)`` tuples. """ ... def keys(self) -> typing.Iterable[KT]: """ - Returns an iterable object of the cache's keys. + Returns an iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. Keys are not ordered. - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are not ordered. + Returns: + An iterable of keys. """ ... def values(self) -> typing.Iterable[VT]: """ - Returns an iterable object of the cache's values. + Returns an iterable of the cache's values. - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are not ordered. + Warning: + Do not modify the cache while iterating. Values are not ordered. + + Returns: + An iterable of values. """ ... def random_key(self) -> KT: """ Randomly selects and returns a key from the cache. - Raises `KeyError` If the cache is empty. + + Returns: + A randomly chosen key. + + Raises: + KeyError: If the cache is empty. """ ... class LRUCache(BaseCacheImpl[KT, VT]): """ - A Least-Recently-Used (LRU) cache eviction policy: when the cache is full, - the item that has not been accessed for the longest time is removed first, - regardless of how many times it was accessed in the past. - - ## How It Works - The LRU algorithm is one of the most widely used cache eviction strategies in - practice. Items are tracked by their access recency—every time an item is read - or written, it becomes the most recently used. When the cache reaches capacity, - the least recently used item (the one that was accessed longest ago) is - evicted to make room for new entries. - - This implementation pairs a doubly-linked list with a hash map. The linked list - maintains items in access order: the most recently used item sits at the back, - and the least recently used at the front. The hash map stores pointers (cursors) - into this list, enabling O(1) key lookups. On every access—read or write—the - accessed item is moved to the back of the list, promoting it to "most recently used" - status. When eviction is needed, the front item is removed. - - The doubly-linked list structure is critical: it permits O(1) removal and - reinsertion of any item anywhere in the ordering, without requiring a full rebuild - or index shifting. A running total tracks the current size of cached items, - allowing capacity checks in constant time. - - ### Pros - - **Excellent hit rates on temporal locality.** Workloads where recently or - frequently accessed items are likely to be needed again soon benefit dramatically - from LRU's recency-aware eviction. Real-world caches (CPU L1/L2, database - buffers, CDN edges) rely on this principle. - - **Insert, lookup, and evict are all O(1) amortized.** The doubly-linked list - and hash map combination guarantees no per-operation index shifting or traversals. - - **Automatic adaptation to access patterns.** Hot keys naturally migrate to the - back of the list and stay there, while cold keys drift toward eviction. No - manual tuning of weights or thresholds is needed. - - **Per-hit cost is minimal.** While LRU does require bookkeeping on reads (moving - an item to the back), this bookkeeping is O(1) and adds negligible overhead to most - workloads. - - ### Cons - - **Per-read overhead.** Every cache hit requires updating the linked list (removing - the item from its current position and reinserting it at the back), which is - measurably slower than FIFO's zero-cost hits on read-heavy workloads. - - **Burst traffic can skew eviction.** A single item accessed many times in rapid - succession will be kept alive indefinitely, even if other keys have better long-term - utility. Recency is a proxy for future use, not a guarantee. - - **Implementation complexity.** The doubly-linked list and cursor-based hash table add - internal complexity compared to simpler policies like FIFO. - - **Memory overhead.** Storing doubly-linked pointers (prev/next) for every cached item - consumes extra memory compared to array-based alternatives. - - ## When to use it - Reach for `LRUPolicy` when: - - Your workload exhibits temporal locality—recently accessed items are likely to be - needed again soon. Databases, web caches, and CPU caches all exhibit this pattern. - - Hit rate is your primary metric. If maximizing the proportion of requests served - from the cache matters more than minimizing per-hit latency, LRU is typically the - best general-purpose choice. - - Access patterns are unknown or unpredictable. LRU's automatic adaptation makes it a safe - default when you cannot statically analyze what keys will be hot. - - You need a standard, battle-tested algorithm. LRU is the de facto eviction policy in most - production systems; it is well-understood, widely supported, and easy to reason about. - - Avoid it when: - - Your workload is write-heavy with few or no re-reads. FIFO's zero per-hit bookkeeping - will outperform LRU if the cache is rarely hit. - - You need sub-microsecond latency on every operation. The linked-list manipulation on each - read can add measurable overhead in ultra-low-latency systems. - - Access patterns are bimodal or exhibit frequency-heavy behavior (a small set of items is - accessed far more often than others). An LFU policy may deliver better hit rates in such cases. + A cache with a Least-Recently-Used (LRU) eviction policy. + + When the cache is full, the item that has not been accessed for the longest + time is removed first, regardless of how many times it was accessed in the + past. + + Items are tracked by access recency — every read or write promotes an item + to "most recently used". When capacity is reached, the least recently used + item (accessed longest ago) is evicted. + + This implementation pairs a doubly-linked list with a hash map. The list + maintains items in access order (most recently used at the back, least + recently used at the front); the hash map stores cursors into the list for + O(1) lookups. On every access the item is moved to the back. On eviction + the front item is removed. A running total enables O(1) capacity checks. + + Pros: + - Excellent hit rates on temporal-locality workloads. + - Insert, lookup, and evict are all O(1) amortized. + - Automatically adapts to access patterns without manual tuning. + - Per-hit cost is minimal (O(1) linked-list manipulation). + + Cons: + - Per-read overhead from updating the linked list on every cache hit. + - Burst traffic can keep a transiently hot item alive at the expense of + items with better long-term utility. + - Implementation complexity from doubly-linked list and cursor-based + hash table. + - Memory overhead from storing prev/next pointers for every entry. + + Use ``LRUCache`` when the workload exhibits temporal locality, hit rate is + the primary metric, or access patterns are unknown or unpredictable. + + Avoid it for write-heavy workloads with few re-reads, ultra-low-latency + requirements, or frequency-heavy bimodal access patterns (consider LFU + instead). """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ - Equals to `self[key] = value`, but returns a value: + Inserts a key-value pair and returns the previous value if present. - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. - It's recommended to use this method instead of `self[key] = value`, as it keeps code - compatible across different cache policies. + Args: + key: The key to insert or update. + value: The value to associate with ``key``. + + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). """ ... def update(self, iterable: _IterableType[KT, VT]) -> None: """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. """ ... @@ -634,8 +771,12 @@ class LRUCache(BaseCacheImpl[KT, VT]): """ Retrieves the value for a given key from the cache. - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. """ ... @@ -645,46 +786,63 @@ class LRUCache(BaseCacheImpl[KT, VT]): default: typing.Optional[DT] = None, ) -> typing.Optional[VT | DT]: """ - Inserts key with a value of default if key is not in the cache. + Inserts ``key`` with ``default`` as its value if the key is absent. - Returns the value for key if key is in the cache, else default. + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. """ ... def popitem(self) -> typing.Tuple[KT, VT]: """ - Removes the least recently used item from the cache and returns it as a (key, value) tuple. - Raises KeyError if the cache is empty. + Removes and returns the least recently used item. + + Returns: + A ``(key, value)`` tuple for the least recently used item. + + Raises: + KeyError: If the cache is empty. """ ... def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: """ - Returns an iterable object of the cache's items (key-value pairs). + Returns an ordered iterable of the cache's ``(key, value)`` pairs. + + Warning: + Do not modify the cache while iterating. - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are ordered. + Returns: + An iterable of ``(key, value)`` tuples in access order. """ ... def keys(self) -> typing.Iterable[KT]: """ - Returns an iterable object of the cache's keys. + Returns an ordered iterable of the cache's keys. - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are ordered. + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of keys in access order. """ ... def values(self) -> typing.Iterable[VT]: """ - Returns an iterable object of the cache's values. + Returns an ordered iterable of the cache's values. + + Warning: + Do not modify the cache while iterating. - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are ordered. + Returns: + An iterable of values in access order. """ ... @@ -694,103 +852,110 @@ class LRUCache(BaseCacheImpl[KT, VT]): default: typing.Optional[DT] = ..., ) -> typing.Union[VT, DT]: """ - Retrieves the value for a given key from the cache (without promoting the key). + Retrieves the value for a key without updating its recency. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. """ + ... def least_recently_used(self) -> typing.Optional[KT]: """ - Returns the key in the cache that has not been accessed in the longest time. + Returns the key that has not been accessed for the longest time. + + Returns: + The least recently used key. - Raises `KeyError` if cache is empty. + Raises: + KeyError: If the cache is empty. """ ... def most_recently_used(self) -> typing.Optional[KT]: """ - Returns the key in the cache that has been accessed in the shortest time. + Returns the key that was accessed most recently. + + Returns: + The most recently used key. - Raises `KeyError` if cache is empty. + Raises: + KeyError: If the cache is empty. """ ... class LFUCache(BaseCacheImpl[KT, VT]): """ - A Least-Frequently-Used (LFU) cache eviction policy: when the cache is full, the item - with the lowest access count is evicted first. Ties in frequency are broken by recency - - among equally rare items, the oldest is evicted. - - ## How It Works - The LFU algorithm tracks how many times each cached item has been accessed, and always - evicts the item with the smallest count. This makes it well-suited for workloads where - some items are structurally "hot" and where that frequency signal is stable enough to - be worth preserving across cache pressure events. - - This implementation uses a `lazy binary min-heap` keyed on access frequency, paired with - a `hash map` that maps each key to its cursor (a stable pointer into the heap's backing - buffer). The heap is "lazy" in the sense that it does not restore the heap invariant after - every frequency increment; instead it sets a dirty flag and defers the full re-sort until - the next eviction. This amortises the cost of heap maintenance across many hits, so - read-heavy workloads pay far less per operation than a classic eager heap would require. - - On a cache hit, the item's frequency counter is incremented in O(1) and the heap is marked - dirty. On eviction, the heap is sorted if dirty, and the minimum-frequency item is popped - in O(n log n) worst-case (amortised O(log n) under typical access distributions). Lookups - are O(1) via the hash map. - - ### Pros - - Frequency-aware eviction. Items that are accessed often are protected from eviction even - under heavy cache pressure, leading to higher hit rates on skewed workloads. - - O(1) cache hits. Incrementing a counter and marking the heap dirty is constant-time work, - with no structural reorganisation on the hot path. - - Lazy heap sorting amortises O(n log n) sort cost across many inserts and hits, keeping - the average cost per operation much lower than a naive eager implementation. - - ### Cons - - Eviction is O(n log n) worst-case. If the heap is maximally dirty (every entry modified - since last sort), a single eviction triggers a full re-sort over all entries. This is - amortised away in practice but introduces latency spikes under adversarial access patterns. - - Frequency counters accumulate indefinitely. A key that was hot during an early burst remains - privileged long after traffic shifts, causing "cache pollution" - stale items that monopolise - capacity because of historical frequency, not current utility. - - Access patterns must be skewed for LFU to outperform simpler policies. On uniform workloads, - frequency counters provide no signal and the extra bookkeeping is pure overhead. - - ## When to use it - Reach for `LFUPolicy` when: - - Your workload has a stable hot set: a minority of keys that are accessed disproportionately - often and whose relative popularity changes slowly over time. - - Cache pollution from one-time scans is a concern: LFU naturally resists large sequential reads - from displacing frequently accessed items, because freshly inserted keys start at count 1 and - are evicted before any item with accumulated hits. - - Hit rate matters more than worst-case eviction latency: the amortised cost is low, but if your - system has hard real-time latency requirements, the occasional sort spike may be unacceptable. - - Avoid it when access patterns shift rapidly. If the "hot" subset of keys changes frequently, - frequency counters become stale signals and LFU will evict items that have recently become - popular. In those cases, an LRU policy - which tracks recency rather than frequency - will - adapt faster and typically deliver better hit rates. - - Avoid it on uniform workloads where all keys are accessed with roughly equal probability. - The frequency signal provides no meaningful discrimination, and the overhead of maintaining - counters and a heap is wasted compared to the simpler bookkeeping of FIFO or LRU. + A cache with a Least-Frequently-Used (LFU) eviction policy. + + When the cache is full, the item with the lowest access count is evicted + first. Ties in frequency are broken by recency — among equally rare items, + the oldest is evicted. + + Access counts are tracked per key. This implementation uses a lazy binary + min-heap keyed on access frequency, paired with a hash map that maps each + key to its cursor (a stable pointer into the heap's backing buffer). The + heap is "lazy": it does not restore the heap invariant after every frequency + increment; instead it sets a dirty flag and defers re-sorting until the + next eviction, amortising heap-maintenance cost across many hits. + + On a cache hit the frequency counter is incremented in O(1) and the heap is + marked dirty. On eviction the heap is sorted if dirty, then the + minimum-frequency item is popped in O(n log n) worst-case (amortised + O(log n) under typical distributions). Lookups are O(1) via the hash map. + + Pros: + - Frequency-aware eviction protects hot items under heavy cache + pressure. + - O(1) cache hits: incrementing a counter and marking the heap dirty + is constant-time work with no structural reorganisation. + - Lazy heap sorting amortises the O(n log n) sort cost across many + inserts and hits. + + Cons: + - Eviction is O(n log n) worst-case, introducing latency spikes under + adversarial access patterns. + - Frequency counters accumulate indefinitely, causing "cache pollution" + where historically hot but currently cold items monopolise capacity. + - Access patterns must be skewed for LFU to outperform simpler + policies; on uniform workloads the extra bookkeeping is pure overhead. + + Use ``LFUCache`` when the workload has a stable hot set, cache pollution + from one-time scans is a concern, or hit rate matters more than worst-case + eviction latency. + + Avoid it when access patterns shift rapidly (use LRU instead) or when all + keys are accessed with roughly equal probability. """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ - Equals to `self[key] = value`, but returns a value: + Inserts a key-value pair and returns the previous value if present. - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. + + Args: + key: The key to insert or update. + value: The value to associate with ``key``. - It's recommended to use this method instead of `self[key] = value`, as it keeps code - compatible across different cache policies. + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). """ ... def update(self, iterable: _IterableType[KT, VT]) -> None: """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. """ ... @@ -802,8 +967,12 @@ class LFUCache(BaseCacheImpl[KT, VT]): """ Retrieves the value for a given key from the cache. - Returns the value associated with the key if present, otherwise returns the specified default value. - Equivalent to `self[key]`, but provides a fallback default if the key is not found. + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. """ ... @@ -813,46 +982,63 @@ class LFUCache(BaseCacheImpl[KT, VT]): default: typing.Optional[DT] = None, ) -> typing.Optional[VT | DT]: """ - Inserts key with a value of default if key is not in the cache. + Inserts ``key`` with ``default`` as its value if the key is absent. + + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. - Returns the value for key if key is in the cache, else default. + Returns: + The existing value if ``key`` is present, otherwise ``default``. """ ... def popitem(self) -> typing.Tuple[KT, VT]: """ - Removes the least recently used item from the cache and returns it as a (key, value) tuple. - Raises `KeyError` if the cache is empty. + Removes and returns the least frequently used item. + + Returns: + A ``(key, value)`` tuple for the item with the lowest access count. + + Raises: + KeyError: If the cache is empty. """ ... def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: """ - Returns an iterable object of the cache's items (key-value pairs). + Returns an ordered iterable of the cache's ``(key, value)`` pairs. - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are ordered. + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value)`` tuples in frequency order. """ ... def keys(self) -> typing.Iterable[KT]: """ - Returns an iterable object of the cache's keys. + Returns an ordered iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are ordered. + Returns: + An iterable of keys in frequency order. """ ... def values(self) -> typing.Iterable[VT]: """ - Returns an iterable object of the cache's values. + Returns an ordered iterable of the cache's values. - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are ordered. + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of values in frequency order. """ ... @@ -862,23 +1048,45 @@ class LFUCache(BaseCacheImpl[KT, VT]): default: typing.Optional[DT] = ..., ) -> typing.Union[VT, DT]: """ - Retrieves the value for a given key from the cache (without frequency increment). + Retrieves the value for a key without incrementing its frequency counter. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. """ ... def least_frequently_used(self, n: int = 0) -> KT: """ - Returns the key in the cache that has been accessed the least. If n is given, returns the nth least frequently used key. + Returns the key with the lowest access count. + + Args: + n: If given, returns the ``n``-th least frequently used key + (0-indexed). Defaults to 0. + + Returns: + The key with the ``n``-th lowest access count. - Raises `IndexError` if cache is empty, or `n` is out of range. + Raises: + IndexError: If the cache is empty or ``n`` is out of range. - Notes: - - This method may re-sort the cache which can cause iterators to be stopped. - - Do not use this method while using iterators. + Warning: + This method may re-sort the cache. Do not call it while iterating + over the cache. """ ... class TTLCache(BaseCacheImpl[KT, VT]): + """ + A cache with time-to-live (TTL) expiration. + + Items expire automatically after a configurable duration. Eviction follows + a FIFO order among non-expired items when the cache is full. + """ + def __init__( self, maxsize: int, @@ -887,28 +1095,53 @@ class TTLCache(BaseCacheImpl[KT, VT]): *, capacity: int = 0, getsizeof: typing.Callable[[KT, VT]] | None = None, - ) -> None: ... + ) -> None: + """ + Initializes a new TTLCache instance. + + Args: + maxsize: Maximum number of elements the cache can hold. If zero, + the limit is set to ``sys.maxsize`` internally. + global_ttl: Default time-to-live for all entries, in seconds or as + a ``timedelta``. + iterable: Initial data to populate the cache. + capacity: Pre-allocate cache capacity to minimize reallocations. + Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. + When ``None``, each entry is assumed to have a size of 1. + """ + ... + @property def global_ttl(self) -> float: - """Returns the specified `global_ttl`""" + """The configured ``global_ttl`` in seconds.""" ... def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ - Equals to `self[key] = value`, but returns a value: + Inserts a key-value pair and returns the previous value if present. + + Equivalent to ``self[key] = value``, but returns a value. Prefer this + method over direct assignment to keep code compatible across different + cache policies. - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; + Args: + key: The key to insert or update. + value: The value to associate with ``key``. - It's recommended to use this method instead of `self[key] = value`, as it keeps code - compatible across different cache policies. + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). """ ... def update(self, iterable: _IterableType[KT, VT]) -> None: """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. """ ... @@ -918,70 +1151,102 @@ class TTLCache(BaseCacheImpl[KT, VT]): default: typing.Optional[DT] = None, ) -> typing.Optional[VT | DT]: """ - Inserts key with a value of default if key is not in the cache. + Inserts ``key`` with ``default`` as its value if the key is absent. + + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. - Returns the value for key if key is in the cache, else default. + Returns: + The existing value if ``key`` is present, otherwise ``default``. """ ... def popitem(self) -> typing.Tuple[KT, VT]: """ - Removes the element that has been in the cache the longest. + Removes and returns the item that has been in the cache the longest. + + Returns: + A ``(key, value)`` tuple for the oldest item. + + Raises: + KeyError: If the cache is empty. """ ... def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: """ - Returns an iterable object of the cache's items (key-value pairs). + Returns an ordered iterable of the cache's ``(key, value)`` pairs. + + Warning: + Do not modify the cache while iterating. - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are ordered. + Returns: + An iterable of ``(key, value)`` tuples in insertion order. """ ... def keys(self) -> typing.Iterable[KT]: """ - Returns an iterable object of the cache's keys. + Returns an ordered iterable of the cache's keys. - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are ordered. + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of keys in insertion order. """ ... def values(self) -> typing.Iterable[VT]: """ - Returns an iterable object of the cache's values. + Returns an ordered iterable of the cache's values. + + Warning: + Do not modify the cache while iterating. - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are ordered. + Returns: + An iterable of values in insertion order. """ ... def first(self, n: int = 0) -> typing.Optional[KT]: """ - Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). - By using `n` parameter, you can browse order index by index. + Returns the key at position ``n`` in insertion order. - Raises `IndexError` if cache is empty, or `n` is out of range. + The key at position 0 is the one that will be removed by ``popitem()``. + + Args: + n: The index to look up. Defaults to 0 (the oldest item). + + Returns: + The key at the given index. + + Raises: + IndexError: If the cache is empty or ``n`` is out of range. """ ... def last(self) -> typing.Optional[KT]: """ - Returns the last key in cache. Equals to `self.first(-1)`. + Returns the most recently inserted key. Equivalent to ``self.first(-1)``. + + Returns: + The key of the most recently inserted item. - Raises `IndexError` if cache is empty. + Raises: + IndexError: If the cache is empty. """ ... def expire(self, *, reuse: bool = False) -> None: """ - Manually removes expired key-value pairs from cache. + Manually removes all expired key-value pairs from the cache. - If `reuse` is True, will not free the memory for reusing in the future. + Args: + reuse: If ``True``, retains the allocated memory for future reuse + rather than freeing it. Defaults to ``False``. """ ... @@ -991,7 +1256,16 @@ class TTLCache(BaseCacheImpl[KT, VT]): default: typing.Optional[DT] = None, ) -> typing.Tuple[typing.Union[VT, DT], float]: """ - Works exactly like `.get()`, but also returns expiration duration for a given key from the cache (or 0.0 if not found). + Retrieves a value along with its remaining TTL. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + A tuple of ``(value, remaining_ttl)`` where ``remaining_ttl`` is + the expiration duration in seconds, or ``0.0`` if the key was not + found. """ ... @@ -1001,22 +1275,41 @@ class TTLCache(BaseCacheImpl[KT, VT]): default: typing.Optional[DT] = None, ) -> typing.Tuple[typing.Union[VT, DT], float]: """ - Works exactly like `.pop()`, but also returns expiration duration for a given key from the cache (or 0.0 if not found). + Removes a key and returns its value along with its remaining TTL. + + Args: + key: The key to remove. + default: Value to return if the key is not found. + + Returns: + A tuple of ``(value, remaining_ttl)`` where ``remaining_ttl`` is + the expiration duration in seconds, or ``0.0`` if the key was not + found. """ ... def popitem_with_expire(self) -> typing.Tuple[VT, DT, float]: """ - Works exactly like `.popitem()`, but also returns expiration duration for a given key from the cache. + Removes and returns the oldest item along with its remaining TTL. + + Returns: + A tuple of ``(key, value, remaining_ttl)`` where ``remaining_ttl`` + is the expiration duration in seconds. + + Raises: + KeyError: If the cache is empty. """ ... def items_with_expire(self) -> typing.Iterable[typing.Tuple[KT, VT, float]]: """ - Returns an iterable object of the cache's items (key-value pairs) with their expiration duration. + Returns an ordered iterable of items with their remaining TTL. + + Warning: + Do not modify the cache while iterating. - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are ordered. + Returns: + An iterable of ``(key, value, remaining_ttl)`` tuples in insertion + order, where ``remaining_ttl`` is in seconds. """ ... diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs index 1f05f74..ba415a8 100644 --- a/src/policies/lfupolicy.rs +++ b/src/policies/lfupolicy.rs @@ -29,14 +29,14 @@ pub struct FrequencyHandle { } impl FrequencyHandle { - /// Creates a new [`FrequencyHandle`] + /// Creates a new [`FrequencyHandle`] with an initial frequency (always is zero, except + /// in loading pickle states). #[inline] pub fn new( py: pyo3::Python<'_>, getsizeof: &utils::GetsizeofFunction, key: alias::PyObject, value: alias::PyObject, - // initial frequency frequency: u128, ) -> pyo3::PyResult { Self::with_precomputed_hash_key( @@ -48,14 +48,14 @@ impl FrequencyHandle { ) } - /// Creates a new [`FrequencyHandle`] from an already-hashed key. + /// Creates a new [`FrequencyHandle`] from an already-hashed key, + /// with an initial frequency (always is zero, except in loading pickle states). #[inline] pub fn with_precomputed_hash_key( py: pyo3::Python<'_>, getsizeof: &utils::GetsizeofFunction, key: utils::PrecomputedHashObject, value: alias::PyObject, - // initial frequency frequency: u128, ) -> pyo3::PyResult { let size = getsizeof.call(py, key.as_ref(), &value)?; diff --git a/src/policies/traits.rs b/src/policies/traits.rs index 94cdae4..0f8a1e2 100644 --- a/src/policies/traits.rs +++ b/src/policies/traits.rs @@ -96,7 +96,6 @@ pub trait PolicyExt { /// Looks up a handle by `hash` and `eq`, applying policy side-effects on hit. /// /// # Errors - /// /// Returns `Err` if `eq` raises a Python exception. fn get( &mut self, @@ -107,7 +106,6 @@ pub trait PolicyExt { /// Returns a [`PolicyEntry`] for the slot at `hash` / `eq`. /// /// # Errors - /// /// Returns `Err` if `eq` raises a Python exception. fn entry<'a>( &'a mut self, diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index e80caf4..30b99f3 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -90,21 +90,25 @@ impl PyCache { nopolicy::Shared::new(maxsize, getsizeof), ); - if let Some(iterable) = iterable { - let getsizeof = wrapped.shared().getsizeof().clone_ref(py); - - let result = wrapped.extend( - // iterable object - iterable, - // transform function - |key, value| nopolicy::Handle::new(py, &getsizeof, key, value), - ); - self.0.set(wrapped); - result - } else { - self.0.set(wrapped); - Ok(()) - } + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| nopolicy::Handle::new(py, &getsizeof, key, value), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result } #[getter] diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index bfc0970..f8be323 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -36,7 +36,7 @@ implement_pyclass! { /// ### Cons /// - Access-blind eviction. A hot item accessed thousands of times is evicted just as readily as one /// that has never been read. Hit rates suffer on workloads with strong temporal locality. - /// - The logical-index indirection adds a layer of internal complexity compared to a naïve queue-based cache. + /// - The logical-index indirection adds a layer of internal complexity compared to a naive queue-based cache. /// - The rare O(n) index rebase (triggered when `front_offset` nears `usize::MAX - isize::MAX`) introduces /// an occasional latency spike. Amortized cost is negligible, but worst-case latency is unbounded in principle. /// @@ -97,21 +97,25 @@ impl PyFIFOCache { fifopolicy::Shared::new(maxsize, getsizeof), ); - if let Some(iterable) = iterable { - let getsizeof = wrapped.shared().getsizeof().clone_ref(py); - - let result = wrapped.extend( - // iterable object - iterable, - // transform function - |key, value| fifopolicy::Handle::new(py, &getsizeof, key, value), - ); - self.0.set(wrapped); - result - } else { - self.0.set(wrapped); - Ok(()) - } + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| fifopolicy::Handle::new(py, &getsizeof, key, value), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result } #[getter] diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs index 24da082..095297a 100644 --- a/src/pyclasses/lfucache.rs +++ b/src/pyclasses/lfucache.rs @@ -113,21 +113,25 @@ impl PyLFUCache { lfupolicy::Shared::new(maxsize, getsizeof), ); - if let Some(iterable) = iterable { - let getsizeof = wrapped.shared().getsizeof().clone_ref(py); - - let result = wrapped.extend( - // iterable object - iterable, - // transform function - |key, value| lfupolicy::FrequencyHandle::new(py, &getsizeof, key, value, 1), - ); - self.0.set(wrapped); - result - } else { - self.0.set(wrapped); - Ok(()) - } + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| lfupolicy::FrequencyHandle::new(py, &getsizeof, key, value, 0), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result } #[getter] @@ -239,7 +243,7 @@ impl PyLFUCache { ) -> pyo3::PyResult> { let inner = self.0.get(); let handle = - lfupolicy::FrequencyHandle::new(py, inner.shared().getsizeof(), key, value, 1)?; + lfupolicy::FrequencyHandle::new(py, inner.shared().getsizeof(), key, value, 0)?; let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); Ok(old_handle) @@ -262,7 +266,7 @@ impl PyLFUCache { // iterable object iterable.into_bound(py), // transform function - move |key, value| lfupolicy::FrequencyHandle::new(py, &getsizeof, key, value, 1), + move |key, value| lfupolicy::FrequencyHandle::new(py, &getsizeof, key, value, 0), ) } diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs index 1a10f70..5c0d8fa 100644 --- a/src/pyclasses/lrucache.rs +++ b/src/pyclasses/lrucache.rs @@ -123,21 +123,25 @@ impl PyLRUCache { lrupolicy::Shared::new(maxsize, getsizeof), ); - if let Some(iterable) = iterable { - let getsizeof = wrapped.shared().getsizeof().clone_ref(py); - - let result = wrapped.extend( - // iterable object - iterable, - // transform function - |key, value| lrupolicy::Handle::new(py, &getsizeof, key, value), - ); - self.0.set(wrapped); - result - } else { - self.0.set(wrapped); - Ok(()) - } + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| lrupolicy::Handle::new(py, &getsizeof, key, value), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result } #[getter] diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs index 9e35f12..a3f10a5 100644 --- a/src/pyclasses/rrcache.rs +++ b/src/pyclasses/rrcache.rs @@ -95,21 +95,25 @@ impl PyRRCache { rrpolicy::Shared::new(maxsize, getsizeof), ); - if let Some(iterable) = iterable { - let getsizeof = wrapped.shared().getsizeof().clone_ref(py); - - let result = wrapped.extend( - // iterable object - iterable, - // transform function - |key, value| rrpolicy::Handle::new(py, &getsizeof, key, value), - ); - self.0.set(wrapped); - result - } else { - self.0.set(wrapped); - Ok(()) - } + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| rrpolicy::Handle::new(py, &getsizeof, key, value), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result } #[getter] diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index 3587a22..0cc44b5 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -37,7 +37,7 @@ implement_pyclass! { /// /// ### Pros /// - Insert, lookup, and evict are all O(1) amortized: the `front_offset` trick eliminates the O(n) - /// index-shifting that a naïve implementation would require on every eviction. + /// index-shifting that a naive implementation would require on every eviction. /// - Entries expire automatically without any background thread or explicit invalidation call. /// Stale data is never returned to the caller. /// - TTL expiry and insertion-order eviction compose cleanly: the oldest entry is always evicted diff --git a/src/typeref.rs b/src/typeref.rs index af8168c..e671a43 100644 --- a/src/typeref.rs +++ b/src/typeref.rs @@ -1,4 +1,7 @@ +/// Raw pointer to the CPython `dict`, cached at initialization. pub static mut STD_DICT_TYPE: *mut pyo3::ffi::PyTypeObject = std::ptr::null_mut(); + +/// Raw pointer to the CPython `tuple`, cached at initialization. pub static mut STD_TUPLE_TYPE: *mut pyo3::ffi::PyTypeObject = std::ptr::null_mut(); #[inline(never)] @@ -17,6 +20,7 @@ fn _initialize_typeref(py: pyo3::Python) { } } +/// Initializes the cached CPython type object pointers. pub fn initialize_typeref(py: pyo3::Python) { static INIT: std::sync::Once = std::sync::Once::new(); From 6389dae5f31f798460825ac951d6d800f841963b Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 26 May 2026 14:06:37 +0330 Subject: [PATCH 24/60] Support LFUCache.items_with_frequency --- cachebox/_core.pyi | 13 +++++++++++++ src/pyclasses/lfucache.rs | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index fd98568..8371e70 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -1042,6 +1042,19 @@ class LFUCache(BaseCacheImpl[KT, VT]): """ ... + def items_with_frequency(self) -> typing.Iterable[typing.Tuple[KT, VT, int]]: + """ + Returns an ordered iterable of the cache's ``(key, value)`` pairs with their + frequency counter. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value)`` tuples in frequency order. + """ + ... + def peek( self, key: KT, diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs index 095297a..3d43519 100644 --- a/src/pyclasses/lfucache.rs +++ b/src/pyclasses/lfucache.rs @@ -591,7 +591,32 @@ impl PyLFUCache { self.keys(py) } - // TODO: support items_with_frequency + fn items_with_frequency( + &self, + py: pyo3::Python, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + + let mut policy = inner.policy(); + let heap_mut = policy.heap_mut(); + + // TODO: test this edge case + // We don't want to intrupt other iterators with no reason + // so need to manually call sort_by to only intrupt them on changes. + if heap_mut.sort_by(|x, y| x.frequency().cmp(&y.frequency())) { + inner.shared().generation_version().increment(); + } + + let gv = inner.shared().generation_version().clone(); + let initial_gv = gv.get(); + + let result = PyLFUCacheItemsWithFrequency { + iter: parking_lot::Mutex::new(heap_mut.iter(|x, y| x.frequency().cmp(&y.frequency()))), + gv, + initial_gv, + }; + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + } fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); @@ -769,6 +794,13 @@ implement_iterator!( (key.into(), val) }} + PyLFUCacheItemsWithFrequency as "lfucache_items_with_freq" + fn(py, handle) -> (alias::PyObject, alias::PyObject, u128) {{ + let freq = handle.frequency(); + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val, freq) + }} + PyLFUCacheKeys as "lfucache_keys" fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } From 2cd088ace1fc33cc2695c3ccd70722bece7d20df Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 26 May 2026 14:21:08 +0330 Subject: [PATCH 25/60] Remove the BaseIteratorImpl baseclass --- src/lib.rs | 2 -- src/pyclasses/base.rs | 16 +++------------- src/pyclasses/cache.rs | 9 ++++----- src/pyclasses/fifocache.rs | 9 ++++----- src/pyclasses/lfucache.rs | 11 +++++------ src/pyclasses/lrucache.rs | 9 ++++----- src/pyclasses/rrcache.rs | 9 ++++----- src/pyclasses/ttlcache.rs | 11 +++++------ 8 files changed, 29 insertions(+), 47 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d068a58..729fd73 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,8 +23,6 @@ mod _core { #[pymodule_export] use crate::pyclasses::base::PyBaseCacheImpl; - #[pymodule_export] - use crate::pyclasses::base::PyBaseIteratorImpl; #[pymodule_export] use crate::pyclasses::cache::PyCache; diff --git a/src/pyclasses/base.rs b/src/pyclasses/base.rs index 43a8cf7..581c018 100644 --- a/src/pyclasses/base.rs +++ b/src/pyclasses/base.rs @@ -1,23 +1,13 @@ use crate::internal::alias; crate::implement_pyclass! { - /// Base implementation for cache classes in the cachebox library. + /// Base implementation for cache classes. /// - /// This abstract base class defines the generic structure for cache implementations, - /// supporting different key and value types through generic type parameters. - /// Serves as a foundation for specific cache variants like Cache and FIFOCache. + /// This abstract base class defines the generic structure for cache + /// implementations. #[derive(Debug, Default, Clone, Copy)] [subclass, generic, frozen] PyBaseCacheImpl as "BaseCacheImpl" ; } -crate::implement_pyclass! { - /// Base implementation for cache classes in the cachebox library. - /// - /// This abstract base class defines the generic structure for cache implementations, - /// supporting different key and value types through generic type parameters. - /// Serves as a foundation for specific cache variants like Cache and FIFOCache. - #[derive(Debug, Default, Clone, Copy)] - [subclass, generic, frozen] PyBaseIteratorImpl as "BaseIteratorImpl" ; -} #[pyo3::pymethods] impl PyBaseCacheImpl { diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index 30b99f3..8589b57 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -500,7 +500,7 @@ impl PyCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -514,7 +514,7 @@ impl PyCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -528,7 +528,7 @@ impl PyCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } #[inline] @@ -608,8 +608,7 @@ macro_rules! implement_iterator { ) => { $( implement_pyclass! { - [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] - $name as $pyname { + [generic, frozen] $name as $pyname { initial_gv: u32, gv: utils::GenerationVersion, iter: parking_lot::Mutex>, diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index f8be323..f0b1b14 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -505,7 +505,7 @@ impl PyFIFOCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -519,7 +519,7 @@ impl PyFIFOCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -533,7 +533,7 @@ impl PyFIFOCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } #[inline] @@ -636,8 +636,7 @@ macro_rules! implement_iterator { ) => { $( implement_pyclass! { - [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] - $name as $pyname { + [generic, frozen] $name as $pyname { initial_gv: u32, gv: utils::GenerationVersion, iter: parking_lot::Mutex>, diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs index 3d43519..ec2cf4e 100644 --- a/src/pyclasses/lfucache.rs +++ b/src/pyclasses/lfucache.rs @@ -533,7 +533,7 @@ impl PyLFUCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -558,7 +558,7 @@ impl PyLFUCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -583,7 +583,7 @@ impl PyLFUCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } #[inline] @@ -615,7 +615,7 @@ impl PyLFUCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -749,8 +749,7 @@ macro_rules! implement_iterator { ) => { $( implement_pyclass! { - [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] - $name as $pyname { + [generic, frozen] $name as $pyname { initial_gv: u32, gv: utils::GenerationVersion, iter: parking_lot::Mutex>, diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs index 5c0d8fa..bd9235c 100644 --- a/src/pyclasses/lrucache.rs +++ b/src/pyclasses/lrucache.rs @@ -532,7 +532,7 @@ impl PyLRUCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -546,7 +546,7 @@ impl PyLRUCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -560,7 +560,7 @@ impl PyLRUCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } #[inline] @@ -684,8 +684,7 @@ macro_rules! implement_iterator { ) => { $( implement_pyclass! { - [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] - $name as $pyname { + [generic, frozen] $name as $pyname { initial_gv: u32, gv: utils::GenerationVersion, iter: parking_lot::Mutex>, diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs index a3f10a5..f4fa6b6 100644 --- a/src/pyclasses/rrcache.rs +++ b/src/pyclasses/rrcache.rs @@ -505,7 +505,7 @@ impl PyRRCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -519,7 +519,7 @@ impl PyRRCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -533,7 +533,7 @@ impl PyRRCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } #[inline] @@ -630,8 +630,7 @@ macro_rules! implement_iterator { ) => { $( implement_pyclass! { - [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] - $name as $pyname { + [generic, frozen] $name as $pyname { initial_gv: u32, gv: utils::GenerationVersion, iter: parking_lot::Mutex>, diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index 0cc44b5..dab3667 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -568,7 +568,7 @@ impl PyTTLCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -585,7 +585,7 @@ impl PyTTLCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -602,7 +602,7 @@ impl PyTTLCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } #[inline] @@ -798,7 +798,7 @@ impl PyTTLCache { gv, initial_gv, }; - pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseIteratorImpl)) + pyo3::Py::new(py, result) } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { @@ -829,8 +829,7 @@ macro_rules! implement_iterator { ) => { $( implement_pyclass! { - [extends=crate::pyclasses::base::PyBaseIteratorImpl, generic, frozen] - $name as $pyname { + [generic, frozen] $name as $pyname { initial_gv: u32, gv: utils::GenerationVersion, iter: parking_lot::Mutex>, From 7acbf1028fccbd523dd54aea6bf9ca6da4a38fd8 Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 26 May 2026 21:02:08 +0330 Subject: [PATCH 26/60] Refactor VTTLCache & improve performance `VTTLCache` class refactored ( some methods are missing yet ). `TTLCache` and `LFUCache` has performance improvements. --- cachebox/__init__.py | 1 + cachebox/_cachebox.py | 151 +++++++- cachebox/_core.pyi | 149 +++++++- src/internal/utils.rs | 44 ++- src/lib.rs | 3 + src/policies/common.rs | 4 +- src/policies/lfupolicy.rs | 46 ++- src/policies/mod.rs | 1 + src/policies/ttlpolicy.rs | 22 +- src/policies/vttlpolicy.rs | 503 +++++++++++++++++++++++++++ src/pyclasses/lfucache.rs | 91 ++--- src/pyclasses/mod.rs | 1 + src/pyclasses/ttlcache.rs | 105 +----- src/pyclasses/vttlcache.rs | 684 +++++++++++++++++++++++++++++++++++++ tests/test_impls.py | 36 ++ 15 files changed, 1648 insertions(+), 193 deletions(-) create mode 100644 src/policies/vttlpolicy.rs create mode 100644 src/pyclasses/vttlcache.rs diff --git a/cachebox/__init__.py b/cachebox/__init__.py index a4d4f68..46bb79f 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -5,6 +5,7 @@ from ._cachebox import LRUCache as LRUCache from ._cachebox import RRCache as RRCache from ._cachebox import TTLCache as TTLCache +from ._cachebox import VTTLCache as VTTLCache try: from ._core import ( diff --git a/cachebox/_cachebox.py b/cachebox/_cachebox.py index ada5cdb..13af0b4 100644 --- a/cachebox/_cachebox.py +++ b/cachebox/_cachebox.py @@ -1,7 +1,7 @@ import threading import time import typing -from datetime import timedelta +from datetime import datetime, timedelta from ._core import BaseCacheImpl as BaseCacheImpl from ._core import Cache as Cache @@ -12,6 +12,7 @@ # private import from ._core import TTLCache as _CoreTTLCache +from ._core import VTTLCache as _CoreVTTLCache if typing.TYPE_CHECKING: from ._core import _IterableType @@ -20,7 +21,7 @@ VT = typing.TypeVar("VT") -class TTLCache(_CoreTTLCache): +class TTLCache(_CoreTTLCache[KT, VT]): """ A cache with a Time-To-Live (TTL) eviction policy. @@ -174,3 +175,149 @@ def stop_sweeper(self) -> None: def __del__(self) -> None: self.stop_sweeper() + + +class VTTLCache(_CoreVTTLCache[KT, VT]): + """ + A cache with a Variable Time-To-Live (VTTL) eviction policy. + + Each item can be inserted with its own individual TTL (time-to-live). When + an item's TTL expires, it is considered stale and will be evicted. Items + inserted without a TTL never expire and are only evicted when the cache + reaches capacity. + + Expiration is managed lazily by default: stale entries are not removed + immediately when they expire, but are cleaned up on the next access or + when the cache needs to reclaim capacity. Optionally, a ``sweep_interval`` + can be configured to spawn a background thread that proactively removes + expired items on a fixed schedule, bounding the window in which stale + data can be observed or memory held unnecessarily. + + Internally, a lazy-evaluated min-heap tracks expiration deadlines. The + heap is only fully sorted when needed (e.g. during eviction), keeping + insert costs low on average. A hash table stores cursors into the heap for + O(1) key lookups. A running total enables O(1) capacity checks. + + When the cache is full and eviction is needed, expired items are reclaimed + first (in expiration order, cheapest deadline first). If no expired items + exist, the item with the nearest upcoming expiration is evicted. Items with + no TTL are the last resort and are evicted only when all expiring items + have been exhausted. + + Pros: + - Per-item TTL control: each entry can have a different lifetime. + - Expired items are reclaimed before live items, maximising useful + capacity. + - Lazy expiry avoids background threads and timer overhead by default. + - Optional background sweeping bounds stale-data visibility and memory + retention when lazy eviction is insufficient. + - Insert, lookup, and evict are O(1) amortized (O(log n) worst-case + during heap rebalancing). + - TTL-free items coexist naturally alongside expiring ones. + + Cons: + - Without sweeping, stale items may linger in memory until the next + access or eviction pressure forces a cleanup. + - With sweeping, a background thread is running for the lifetime of + the cache, adding concurrency overhead and requiring thread-safe + internal locking. + - Slightly higher per-insert cost compared to pure LRU/LFU. + - No guarantee on the exact eviction moment for expired items in lazy + mode; callers that require strict TTL enforcement should validate + timestamps on read, or configure a sufficiently short + ``sweep_interval``. + + Use ``VTTLCache`` when different items have different natural lifetimes + (e.g. session tokens, API responses with varying freshness requirements, + or multi-tier data with mixed staleness tolerances). Set + ``sweep_interval`` when bounded staleness or proactive memory reclamation + is required. + + Avoid it when all items share a uniform TTL (consider ``TTLCache`` instead), + when strict and immediate expiry is a hard requirement, or when memory pressure + from temporarily lingering stale entries is unacceptable and a background thread + is not an option. + """ + + def __init__( + self, + maxsize: int, + iterable: _IterableType[KT, VT] | None = None, + ttl: float | timedelta | datetime | None = None, + *, + capacity: int = 0, + getsizeof: typing.Callable[[KT, VT]] | None = None, + sweep_interval: float | timedelta | None = None, + ) -> None: + """ + Initializes a new TTLCache instance. + + Args: + maxsize: Maximum number of elements the cache can hold. If zero, + the limit is set to ``sys.maxsize`` internally. + iterable: Initial data to populate the cache. + ttl: Time-to-live duration for ``iterable`` items. This *is not* a global ttl. + capacity: Pre-allocate cache capacity to minimize reallocations. + Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. + When ``None``, each entry is assumed to have a size of 1 + (equivalent to ``lambda k, v: 1``). Use this to implement + weighted caching — for example, sizing entries by memory + footprint or byte length. + sweep_interval: If set, starts a background thread that sweeps and + removes all expired entries on this interval (in seconds or as + a ``timedelta``). When ``None``, expiry is lazy. Defaults to + ``None``. Must be greater than or equal to 1. + + Note: + The cache can be pre-sized via ``capacity`` to reduce + reallocations when the number of expected entries is known + ahead of time. + + Raises: + ValueError: If ``sweep_interval`` is set to a value less than 1. + """ + super().__init__( + maxsize, + iterable, + ttl, + capacity=capacity, + getsizeof=getsizeof, + ) + + self._thread: threading.Thread | None = None + self._thread_is_running: bool = False + + if sweep_interval is not None: + if isinstance(sweep_interval, timedelta): + sweep_interval = sweep_interval.total_seconds() + + if sweep_interval < 1: + raise ValueError("sweep_interval must be more than 1 seconds.") + + self._thread_is_running = True + self._thread = threading.Thread( + target=self._sweeper_thread, + args=(sweep_interval,), + daemon=True, + ) + self._thread.start() + + self._sweep_interval = sweep_interval + + @property + def sweep_interval(self) -> float | None: + """The configured ``sweep_interval`` in seconds.""" + return self._sweep_interval + + def _sweeper_thread(self, interval: float): + while self._thread_is_running: + time.sleep(interval) + self.expire() + + def stop_sweeper(self) -> None: + """Signals the background sweeper thread to stop, if one is active.""" + self._thread_is_running = False + + def __del__(self) -> None: + self.stop_sweeper() diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 8371e70..1246553 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -1,5 +1,5 @@ import typing -from datetime import timedelta +from datetime import datetime, timedelta from _typeshed import SupportsItems @@ -1326,3 +1326,150 @@ class TTLCache(BaseCacheImpl[KT, VT]): order, where ``remaining_ttl`` is in seconds. """ ... + +class VTTLCache(BaseCacheImpl[KT, VT]): + """ + A cache with a Variable Time-To-Live (VTTL) eviction policy. + + Each item can be inserted with its own individual TTL (time-to-live). When + an item's TTL expires, it is considered stale and will be evicted. Items + inserted without a TTL never expire and are only evicted when the cache + reaches capacity. + """ + + def __init__( + self, + maxsize: int, + iterable: _IterableType[KT, VT] | None = None, + ttl: float | timedelta | datetime | None = None, + *, + capacity: int = 0, + getsizeof: typing.Callable[[KT, VT]] | None = None, + ) -> None: + """ + Initializes a new TTLCache instance. + + Args: + maxsize: Maximum number of elements the cache can hold. If zero, + the limit is set to ``sys.maxsize`` internally. + iterable: Initial data to populate the cache. + ttl: Time-to-live duration for ``iterable`` items. This *is not* a global ttl. + capacity: Pre-allocate cache capacity to minimize reallocations. + Defaults to 0. + getsizeof: A callable that computes the size of a key-value pair. + When ``None``, each entry is assumed to have a size of 1. + """ + ... + + def insert( + self, + key: KT, + value: VT, + ttl: float | timedelta | datetime | None = None, + ) -> typing.Optional[VT]: + """ + Insert a key-value pair into the cache with an optional time-to-live (TTL). + Returns the previous value associated with the key, if it existed. + + Args: + key: The key to insert or update. + value: The value to associate with ``key``. + ttl: An optional time-to-live duration for the item. + + Returns: + ``None`` if the key was not previously present; the old value if + the key already existed (the key itself is not updated). + """ + ... + + def update( + self, + iterable: _IterableType[KT, VT], + ttl: float | timedelta | datetime | None = None, + ) -> None: + """ + Updates the cache with elements from a dictionary or iterable of key-value pairs. + + Args: + iterable: A dictionary, object supporting ``items()``, another + cache instance, or an iterable of ``(key, value)`` tuples. + ttl: An optional time-to-live duration for items. + """ + ... + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ttl: float | timedelta | datetime | None = None, + ) -> typing.Optional[VT | DT]: + """ + Inserts ``key`` with ``default`` as its value if the key is absent. + + Args: + key: The key to look up or insert. + default: The value to insert if ``key`` is not in the cache. + Defaults to ``None``. + ttl: An optional time-to-live duration for items. + + Returns: + The existing value if ``key`` is present, otherwise ``default``. + """ + ... + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes and returns the key-value pair that is closest to expiration. + + Returns: + A tuple containing the key and value of the removed item. + + Raises: + KeyError: If the cache is empty. + """ + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + """ + Returns an ordered iterable of the cache's ``(key, value)`` pairs. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value)`` tuples in insertion order. + """ + ... + + def keys(self) -> typing.Iterable[KT]: + """ + Returns an ordered iterable of the cache's keys. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of keys in insertion order. + """ + ... + + def values(self) -> typing.Iterable[VT]: + """ + Returns an ordered iterable of the cache's values. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of values in insertion order. + """ + ... + + def expire(self, *, reuse: bool = False) -> None: + """ + Manually removes all expired key-value pairs from the cache. + + Args: + reuse: If ``True``, retains the allocated memory for future reuse + rather than freeing it. Defaults to ``False``. + """ + ... diff --git a/src/internal/utils.rs b/src/internal/utils.rs index c55764a..59d3954 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -222,19 +222,47 @@ impl<'a, 'py> pyo3::FromPyObject<'a, 'py> for OptionalArgument { } } +/// It can use as PyO3 function argument. Accepts Python `float`, `dateime.timedelta`, and `datetime.datetime`. #[derive(pyo3::FromPyObject)] -pub enum FloatOrTimedelta { +pub enum TimeToLiveArgument { Float(f64), - Timedelta(chrono::Duration), + Timedelta(chrono::TimeDelta), + Datetime(chrono::DateTime), } -impl From for f64 { - #[inline] - fn from(value: FloatOrTimedelta) -> Self { - match value { - FloatOrTimedelta::Float(x) => x, - FloatOrTimedelta::Timedelta(x) => x.as_seconds_f64(), +impl TimeToLiveArgument { + /// Consumes self and returns [`std::time::Duration`]. + #[inline(always)] + pub fn into_duration(self, datetime_allowed: bool) -> pyo3::PyResult { + self.into_seconds_f64(datetime_allowed) + .map(std::time::Duration::from_secs_f64) + } + + #[inline(always)] + pub fn into_seconds_f64(self, datetime_allowed: bool) -> pyo3::PyResult { + let seconds = match self { + Self::Float(x) => x, + Self::Timedelta(x) => x.as_seconds_f64(), + Self::Datetime(x) => { + if !datetime_allowed { + return Err(new_py_error!( + PyValueError, + "expected datetime.timedelta or float, got datetime.datetime" + )); + } else { + (chrono::Utc::now() - x).as_seconds_f64() + } + } + }; + + if seconds <= 0.0 { + return Err(new_py_error!( + PyValueError, + "time-to-live must be positive and non-zero" + )); } + + Ok(seconds) } } diff --git a/src/lib.rs b/src/lib.rs index 729fd73..63f283b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,6 +78,9 @@ mod _core { #[pymodule_export] use crate::pyclasses::ttlcache::PyTTLCacheValues; + #[pymodule_export] + use crate::pyclasses::vttlcache::PyVTTLCache; + #[pymodule_init] pub fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { typeref::initialize_typeref(m.py()); diff --git a/src/policies/common.rs b/src/policies/common.rs index ecb04af..d30d6a5 100644 --- a/src/policies/common.rs +++ b/src/policies/common.rs @@ -125,13 +125,13 @@ impl Shared { pub unsafe fn with_ttl( maxsize: usize, getsizeof: Option, - ttl: Option, + ttl: Option, ) -> Self { Self { maxsize: safe_non_zero!(maxsize), gv: utils::GenerationVersion::default(), getsizeof: utils::GetsizeofFunction::new(getsizeof), - global_ttl: ttl.map(std::time::Duration::from_secs_f64), + global_ttl: ttl, } } } diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs index ba415a8..2f884c8 100644 --- a/src/policies/lfupolicy.rs +++ b/src/policies/lfupolicy.rs @@ -9,6 +9,12 @@ use crate::policies::traits::SharedExt; pub use crate::policies::common::Shared; +macro_rules! compare_fn { + () => { + |x, y| x.frequency.cmp(&y.frequency) + }; +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] #[repr(transparent)] pub struct Frequency(u128); @@ -166,10 +172,7 @@ impl traits::OccupiedExt for Occupied<'_> { self.shared.generation_version().increment(); let (cursor, _) = unsafe { self.policy.table.remove(self.bucket) }; - let item = self - .policy - .heap - .remove(cursor, |x, y| x.frequency.cmp(&y.frequency)); + let item = self.policy.heap.remove(cursor, compare_fn!()); self.policy.currsize = self.policy.currsize.saturating_sub(item.size()); item @@ -248,8 +251,30 @@ impl LFUPolicy { } #[inline] - pub fn heap_mut(&mut self) -> &mut lazyheap::LazyHeap { - &mut self.heap + pub fn iter(&mut self, gv: &utils::GenerationVersion) -> lazyheap::RawIter { + // We don't want to intrupt other iterators with no reason + // so need to manually call sort_by to only intrupt them on changes. + if self.heap.sort_by(compare_fn!()) { + gv.increment(); + } + + self.heap.iter(compare_fn!()) + } + + #[inline] + pub fn least_frequently_used( + &mut self, + py: pyo3::Python, + n: usize, + gv: &utils::GenerationVersion, + ) -> Option { + if self.heap.sort_by(compare_fn!()) { + gv.increment(); + } + + self.heap + .get(n) + .map(|cursor| unsafe { cursor.element().key().clone_ref(py) }) } #[inline] @@ -341,7 +366,7 @@ impl PolicyExt for LFUPolicy { { let front_cursor = self .heap - .front(|x, y| x.frequency.cmp(&y.frequency)) + .front(compare_fn!()) .ok_or_else(|| new_py_error!(PyKeyError, "cache is empty"))?; self.table @@ -353,10 +378,7 @@ impl PolicyExt for LFUPolicy { shared.generation_version().increment(); - let handle = self - .heap - .pop_front(|x, y| x.frequency.cmp(&y.frequency)) - .unwrap(); + let handle = self.heap.pop_front(compare_fn!()).unwrap(); self.currsize = self.currsize.saturating_sub(handle.size); Ok(handle) @@ -442,7 +464,7 @@ impl PolicyExt for LFUPolicy { let mut heap = lazyheap::LazyHeap::new(); unsafe { - for cursor in self.heap.iter(|x, y| x.frequency.cmp(&y.frequency)) { + for cursor in self.heap.iter(compare_fn!()) { let cloned_handle = cursor.element().clone_ref(py); let new_cursor = heap.push(cloned_handle); table.insert_no_grow(new_cursor.element().key().hash(), new_cursor); diff --git a/src/policies/mod.rs b/src/policies/mod.rs index aaf874c..2c62800 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -7,5 +7,6 @@ pub mod lrupolicy; pub mod nopolicy; pub mod rrpolicy; pub mod ttlpolicy; +pub mod vttlpolicy; pub mod wrapped; diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index e0d2fb6..dc21099 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -334,7 +334,7 @@ impl TTLPolicy { self.front_offset = 0; } - pub fn expire(&mut self, gv: &utils::GenerationVersion) -> pyo3::PyResult<()> { + pub fn expire(&mut self, gv: &utils::GenerationVersion) { let now = std::time::SystemTime::now(); while let Some(handle) = self.entries.front() { @@ -343,7 +343,12 @@ impl TTLPolicy { } let eq = |index: &usize| Ok::<_, pyo3::PyErr>((*index - self.front_offset) == 0); - if std::hint::unlikely(self.table.remove_entry(handle.key().hash(), eq)?.is_none()) { + if std::hint::unlikely( + self.table + .remove_entry(handle.key().hash(), eq) + .unwrap() + .is_none(), + ) { unreachable!("popitem key not found in table"); } @@ -354,19 +359,14 @@ impl TTLPolicy { self.currsize = self.currsize.saturating_sub(front.size()); self.decrement_indexes(1, self.entries.len()); } - - Ok(()) } #[inline] - pub fn iter( - &mut self, - shared: &Shared, - ) -> pyo3::PyResult> { - self.expire(shared.generation_version())?; + pub fn iter(&mut self, shared: &Shared) -> utils::RawVecDequeIter { + self.expire(shared.generation_version()); let (first, second) = self.entries.as_slices(); - Ok(utils::RawVecDequeIter::new(first, second)) + utils::RawVecDequeIter::new(first, second) } } @@ -419,7 +419,7 @@ impl PolicyExt for TTLPolicy { key: &::Key, shared: &'a Self::Shared, ) -> pyo3::PyResult, Self::Vacant<'a>>> { - self.expire(shared.generation_version())?; + self.expire(shared.generation_version()); let eq = |index: &usize| get_handle!(&self, *index).key().py_eq(py, key); match self.table.find(key.hash(), eq)? { diff --git a/src/policies/vttlpolicy.rs b/src/policies/vttlpolicy.rs new file mode 100644 index 0000000..9db4042 --- /dev/null +++ b/src/policies/vttlpolicy.rs @@ -0,0 +1,503 @@ +use crate::hashbrown; +use crate::internal::alias; +use crate::internal::lazyheap; +use crate::internal::utils; +use crate::policies::traits; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; + +pub use crate::policies::common::Shared; +use crate::policies::traits::SharedExt; + +/// Compares two items by `expires_at`, placing `None` values last. +macro_rules! compare_fn { + () => { + |a, b| { + a.expires_at + .is_none() + .cmp(&b.expires_at.is_none()) + .then_with(|| a.expires_at.cmp(&b.expires_at)) + } + }; +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum ExpiresAt { + SystemTime(std::time::SystemTime), + Duration(std::time::Duration), +} + +impl From for ExpiresAt { + #[inline] + fn from(value: f64) -> Self { + Self::Duration(std::time::Duration::from_secs_f64(value)) + } +} + +impl From for std::time::SystemTime { + #[inline] + fn from(value: ExpiresAt) -> Self { + match value { + ExpiresAt::Duration(x) => std::time::SystemTime::now() + x, + ExpiresAt::SystemTime(x) => x, + } + } +} + +/// A key-value pair with a precomputed hash and combined size. +pub struct ExpiringHandle { + /// The cache key together with its precomputed hash, avoiding repeated + /// Python hash calls during table lookups. + key: utils::PrecomputedHashObject, + /// The cached value associated with this key. + value: alias::PyObject, + /// Size of the key and value as reported by `getsizeof`. + size: usize, + /// Configured ttl for handle. `None` means has no ttl. + expires_at: Option, +} + +impl ExpiringHandle { + /// Creates a new [`Handle`], which calculates the precomputed hash itself. + #[inline] + pub fn new( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + expires_at: Option, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + Self::with_precomputed_hash_key( + py, + getsizeof, + expires_at, + utils::PrecomputedHashObject::new(py, key)?, + value, + ) + } + + /// Creates a new [`Handle`] from an already-hashed key. + /// + /// Prefer this over [`Handle::new`] when the caller has already paid the cost + /// of computing the hash (e.g. during a table lookup that preceded insertion). + #[inline] + pub fn with_precomputed_hash_key( + py: pyo3::Python<'_>, + getsizeof: &utils::GetsizeofFunction, + expires_at: Option, + key: utils::PrecomputedHashObject, + value: alias::PyObject, + ) -> pyo3::PyResult { + let size = getsizeof.call(py, key.as_ref(), &value)?; + Ok(Self { + key, + value, + size, + expires_at: expires_at.map(Into::into), + }) + } + + /// Consumes `self` and returns the [`utils::PrecomputedHashObject`]. + #[inline] + pub fn into_key(self) -> utils::PrecomputedHashObject { + self.key + } + + /// Returns a reference to the value. + #[inline] + pub fn value(&self) -> &alias::PyObject { + &self.value + } + + /// Consumes `self` and returns the value of the pair. + #[inline] + pub fn into_value(self) -> alias::PyObject { + self.value + } + + /// Consumes `self` and returns the pair. + #[inline] + pub fn into_pair(self) -> (utils::PrecomputedHashObject, alias::PyObject) { + (self.key, self.value) + } + + #[inline] + pub fn expires_at(&self) -> Option { + self.expires_at + } + + #[inline] + pub fn is_expired(&self, now: std::time::SystemTime) -> bool { + self.expires_at.map(|x| x <= now).unwrap_or_default() + } + + /// Makes a clone of self. + /// + /// This creates another pointer to the same object, increasing its reference count. + #[inline] + pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { + Self { + key: self.key.clone_ref(py), + value: self.value.clone_ref(py), + size: self.size, + expires_at: self.expires_at, + } + } +} + +impl HandleExt for ExpiringHandle { + type Key = utils::PrecomputedHashObject; + + #[inline(always)] + fn key(&self) -> &utils::PrecomputedHashObject { + &self.key + } + + #[inline(always)] + fn size(&self) -> usize { + self.size + } +} + +/// A view into an occupied entry in [`VTTLPolicy`]. +pub struct Occupied<'a> { + /// The parent storage that owns the hash table. + policy: &'a mut VTTLPolicy, + /// The shared configuration + shared: &'a Shared, + /// Raw bucket pointing to the occupied index. + bucket: hashbrown::raw::Bucket>, +} + +impl traits::OccupiedExt for Occupied<'_> { + type Handle = ExpiringHandle; + type Shared = Shared; + + fn replace(self, new: Self::Handle) -> Self::Handle { + // Here we don't need to increment generation version + // self.shared.generation_version().increment(); + + unsafe { + let cursor = self.bucket.as_mut(); + + self.policy.currsize = self + .policy + .currsize + .saturating_sub(cursor.element().size()) + .saturating_add(new.size()); + + let old = std::mem::replace(cursor.element_mut(), new); + + self.policy.heap.mark_unsorted(); + old + } + } + + #[inline] + fn remove(self) -> Self::Handle { + self.shared.generation_version().increment(); + + let (cursor, _) = unsafe { self.policy.table.remove(self.bucket) }; + let item = self.policy.heap.remove(cursor, compare_fn!()); + + self.policy.currsize = self.policy.currsize.saturating_sub(item.size()); + item + } +} +/// A view into a vacant slot in [`VTTLPolicy`]. +pub struct Vacant<'a> { + /// The parent policy that owns the hash table. + policy: &'a mut VTTLPolicy, + /// The shared configuration + shared: &'a Shared, +} + +impl traits::VacantExt for Vacant<'_> { + type Handle = ExpiringHandle; + type Shared = Shared; + + #[inline] + fn would_exceed(&self, extra_size: usize) -> bool { + self.policy.currsize.saturating_add(extra_size) > self.shared.maxsize() + } + + #[inline] + fn evict(&mut self) -> pyo3::PyResult<()> { + self.policy.evict(self.shared)?; + Ok(()) + } + + fn insert(self, handle: Self::Handle) { + self.shared.generation_version().increment(); + + self.policy.currsize = self.policy.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = self.policy.heap.push(handle); + + self.policy + .table + .insert(hash, cursor, |x| unsafe { x.element().key().hash() }); + } +} + +pub struct VTTLPolicy { + // Fields are same as `LFUPolicy` + table: hashbrown::raw::RawTable>, + heap: lazyheap::LazyHeap, + currsize: usize, +} + +impl VTTLPolicy { + /// Creates a new [`VTTLPolicy`]. + /// + /// The underlying hash map is pre-allocated to hold at least `capacity` entries + /// without reallocation. + pub fn new(capacity: usize) -> Self { + Self { + table: hashbrown::raw::RawTable::with_capacity(capacity), + heap: lazyheap::LazyHeap::new(), + currsize: 0, + } + } + + #[inline] + pub fn table(&self) -> &hashbrown::raw::RawTable> { + &self.table + } + + #[inline] + pub fn heap(&self) -> &lazyheap::LazyHeap { + &self.heap + } + + #[inline] + pub fn iter(&mut self, gv: &utils::GenerationVersion) -> lazyheap::RawIter { + self.expire(gv); + + // We don't want to intrupt other iterators with no reason + // so need to manually call sort_by to only intrupt them on changes. + if self.heap.sort_by(compare_fn!()) { + gv.increment(); + } + + self.heap.iter(compare_fn!()) + } + + pub fn expire(&mut self, gv: &utils::GenerationVersion) { + let now = std::time::SystemTime::now(); + + while let Some(cursor) = self.heap.front(compare_fn!()) { + let handle = unsafe { cursor.element() }; + + if !handle.is_expired(now) { + break; + } + + self.table + .remove_entry(handle.key.hash(), |x| { + Ok::<_, pyo3::PyErr>(x.as_ptr() == cursor.as_ptr()) + }) + .unwrap(); + + drop(cursor); + + gv.increment(); + + let handle = self.heap.pop_front(compare_fn!()).unwrap(); + self.currsize = self.currsize.saturating_sub(handle.size); + } + } +} + +impl PolicyExt for VTTLPolicy { + type Shared = Shared; + type Handle = ExpiringHandle; + + type Occupied<'a> + = Occupied<'a> + where + Self: 'a; + + type Vacant<'a> + = Vacant<'a> + where + Self: 'a; + + #[inline] + fn current_size(&self) -> usize { + self.currsize + } + + #[inline] + fn get( + &mut self, + py: pyo3::Python, + key: &::Key, + ) -> pyo3::PyResult> { + let cursor = self + .table + .get_mut(key.hash(), |x| unsafe { key.py_eq(py, &x.element().key) })?; + + match cursor { + Some(cursor) => { + let handle = unsafe { cursor.element() }; + + if handle.is_expired(std::time::SystemTime::now()) { + Ok(None) + } else { + Ok(Some(handle)) + } + } + None => Ok(None), + } + } + + fn entry<'a>( + &'a mut self, + py: pyo3::Python, + key: &::Key, + shared: &'a Self::Shared, + ) -> pyo3::PyResult, Self::Vacant<'a>>> { + self.expire(shared.generation_version()); + + let eq = |cursor: &lazyheap::Cursor| unsafe { + key.py_eq(py, cursor.element().key()) + }; + + match self.table.find(key.hash(), eq)? { + Some(bucket) => { + let result = Occupied { + policy: self, + shared, + bucket, + }; + Ok(traits::PolicyEntry::Occupied(result)) + } + None => { + let result = Vacant { + policy: self, + shared, + }; + Ok(traits::PolicyEntry::Vacant(result)) + } + } + } + + fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { + { + let front_cursor = self + .heap + .front(compare_fn!()) + .ok_or_else(|| new_py_error!(PyKeyError, "cache is empty"))?; + + self.table + .remove_entry(unsafe { front_cursor.element().key.hash() }, |x| { + Ok::<_, pyo3::PyErr>(std::ptr::eq(front_cursor.as_ptr(), x.as_ptr())) + })? + .expect("evict: item not found in table"); + } + + shared.generation_version().increment(); + + let handle = self.heap.pop_front(compare_fn!()).unwrap(); + + self.currsize = self.currsize.saturating_sub(handle.size); + Ok(handle) + } + + fn clear(&mut self, shared: &Self::Shared) { + if self.heap.is_empty() { + return; + } + + shared.generation_version().increment(); + self.table.clear_no_drop(); + self.heap.clear(); + self.currsize = 0; + } + + fn shrink_to_fit(&mut self, shared: &Self::Shared) { + shared.generation_version().increment(); + + self.table + .shrink_to(0, |x| unsafe { x.element().key.hash() }); + + self.heap.shrink_to_fit(); + } + + // TODO: considering expired handles + fn py_eq( + &self, + py: pyo3::Python, + shared: &Self::Shared, + other: &Self, + other_shared: &Self::Shared, + ) -> pyo3::PyResult { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + let result = unsafe { + let mut iterator = self.table.iter().map(|x| x.as_ref()); + + iterator.all(|cursor_1| { + let handle_1 = cursor_1.element(); + + let result = other.table.get(handle_1.key().hash(), |cursor| { + handle_1.key().py_eq(py, cursor.element().key()) + }); + + match result { + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + Ok(None) => false, + Ok(Some(cursor_2)) => { + let handle_2 = cursor_2.element(); + + match utils::pyobject_equal( + py, + handle_1.value.as_ptr(), + handle_2.value.as_ptr(), + ) { + Ok(result) => result, + Err(e) => { + error = Some(e); + // Return false to break the `.all` loop + false + } + } + } + } + }) + }; + + if let Some(error) = error { + return Err(error); + } + Ok(result) + } + + fn clone_ref(&mut self, py: pyo3::Python) -> Self { + let mut table = hashbrown::raw::RawTable::with_capacity(self.table.len()); + let mut heap = lazyheap::LazyHeap::new(); + + unsafe { + for cursor in self.heap.iter(compare_fn!()) { + let cloned_handle = cursor.element().clone_ref(py); + let new_cursor = heap.push(cloned_handle); + table.insert_no_grow(new_cursor.element().key().hash(), new_cursor); + } + } + + Self { + table, + heap, + currsize: self.currsize, + } + } +} diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs index ec2cf4e..1e188d3 100644 --- a/src/pyclasses/lfucache.rs +++ b/src/pyclasses/lfucache.rs @@ -514,74 +514,45 @@ impl PyLFUCache { fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let mut policy = inner.policy(); - let heap_mut = policy.heap_mut(); - - // TODO: test this edge case - // We don't want to intrupt other iterators with no reason - // so need to manually call sort_by to only intrupt them on changes. - if heap_mut.sort_by(|x, y| x.frequency().cmp(&y.frequency())) { - inner.shared().generation_version().increment(); - } - let gv = inner.shared().generation_version().clone(); - let initial_gv = gv.get(); + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); let result = PyLFUCacheItems { - iter: parking_lot::Mutex::new(heap_mut.iter(|x, y| x.frequency().cmp(&y.frequency()))), - gv, - initial_gv, + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), }; pyo3::Py::new(py, result) } fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let mut policy = inner.policy(); - let heap_mut = policy.heap_mut(); - // TODO: test this edge case - // We don't want to intrupt other iterators with no reason - // so need to manually call sort_by to only intrupt them on changes. - if heap_mut.sort_by(|x, y| x.frequency().cmp(&y.frequency())) { - inner.shared().generation_version().increment(); - } - - let gv = inner.shared().generation_version().clone(); - let initial_gv = gv.get(); + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); - // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] let result = PyLFUCacheValues { - iter: parking_lot::Mutex::new(heap_mut.iter(|x, y| x.frequency().cmp(&y.frequency()))), - gv, - initial_gv, + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), }; pyo3::Py::new(py, result) } fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let mut policy = inner.policy(); - let heap_mut = policy.heap_mut(); - - // TODO: test this edge case - // We don't want to intrupt other iterators with no reason - // so need to manually call sort_by to only intrupt them on changes. - if heap_mut.sort_by(|x, y| x.frequency().cmp(&y.frequency())) { - inner.shared().generation_version().increment(); - } - let gv = inner.shared().generation_version().clone(); - let initial_gv = gv.get(); + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); - // SAFETY: We cannot use lifetimes here, but we're tracking changes using [`GenerationVersion`] let result = PyLFUCacheKeys { - iter: parking_lot::Mutex::new(heap_mut.iter(|x, y| x.frequency().cmp(&y.frequency()))), - gv, - initial_gv, + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), }; pyo3::Py::new(py, result) } @@ -596,24 +567,15 @@ impl PyLFUCache { py: pyo3::Python, ) -> pyo3::PyResult> { let inner = self.0.get(); - let mut policy = inner.policy(); - let heap_mut = policy.heap_mut(); - - // TODO: test this edge case - // We don't want to intrupt other iterators with no reason - // so need to manually call sort_by to only intrupt them on changes. - if heap_mut.sort_by(|x, y| x.frequency().cmp(&y.frequency())) { - inner.shared().generation_version().increment(); - } - let gv = inner.shared().generation_version().clone(); - let initial_gv = gv.get(); + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); let result = PyLFUCacheItemsWithFrequency { - iter: parking_lot::Mutex::new(heap_mut.iter(|x, y| x.frequency().cmp(&y.frequency()))), - gv, - initial_gv, + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), }; pyo3::Py::new(py, result) } @@ -704,17 +666,8 @@ impl PyLFUCache { return Err(new_py_error!(PyIndexError, "`n` out of range")); } - let heap_mut = policy.heap_mut(); - - if heap_mut.sort_by(|x, y| x.frequency().cmp(&y.frequency())) { - inner.shared().generation_version().increment(); - } - - match heap_mut.get(n as usize) { - Some(handle) => unsafe { - let element = handle.element(); - Ok(element.key().as_ref().clone_ref(py)) - }, + match policy.least_frequently_used(py, n as usize, inner.shared().generation_version()) { + Some(key) => Ok(key.into()), None => Err(new_py_error!(PyIndexError, "`n` out of range")), } } diff --git a/src/pyclasses/mod.rs b/src/pyclasses/mod.rs index 70785ed..56cf742 100644 --- a/src/pyclasses/mod.rs +++ b/src/pyclasses/mod.rs @@ -5,3 +5,4 @@ pub mod lfucache; pub mod lrucache; pub mod rrcache; pub mod ttlcache; +pub mod vttlcache; diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index dab3667..29d7e1a 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -9,71 +9,8 @@ use crate::policies::wrapped::Wrapped; implement_pyclass! { /// A Time-To-Live (TTL) cache eviction policy: each entry carries an expiration timestamp - /// and is considered stale — and eligible for eviction — once that deadline has passed, + /// and is considered stale — and eligible for eviction - once that deadline has passed, /// regardless of how recently or frequently it was accessed. - /// - /// ## How It Works - /// The TTL algorithm pairs time-based expiration with insertion-order eviction. Every entry - /// is stamped with an absolute `expires_at` timestamp at insertion time (computed as - /// `now + global_ttl`). Entries are stored in insertion order, and eviction proceeds from the - /// front of that queue — but only after confirming the candidate has actually expired. A live - /// entry at the front of the queue blocks eviction of everything behind it, so the cache may - /// temporarily exceed capacity if the oldest entries are still fresh. - /// - /// Like `FIFOPolicy`, this implementation backs the queue with a `double-ended queue` for O(1) - /// front removal and a `hash map` for O(1) key lookups. The same logical-index trick applies: - /// the table stores monotonically increasing counters rather than physical deque positions, and - /// a `front_offset` counter converts a logical index back to a physical one at read time via - /// `entries[table[key] - front_offset]`. This keeps eviction and lookup O(1) without rewriting - /// the table on every eviction. On top of that, every read checks `expires_at` against the current wall-clock time and - /// treats any expired entry as a cache miss. - /// - /// Without `sweep_interval`, an expiry sweep is triggered automatically on every call to - /// `insert`, `update`, `current_size`, `remaining_size`, `last`, `first`, `items`, `keys`, - /// `values`, and `__iter__`. A completely idle cache will accumulate stale entries between - /// these calls, but any normal interaction with the cache is sufficient to reclaim them. - /// When `sweep_interval` is set, a background Rust thread performs the sweep on that interval - /// instead, reclaiming expired entries independent of any method calls. - /// - /// ### Pros - /// - Insert, lookup, and evict are all O(1) amortized: the `front_offset` trick eliminates the O(n) - /// index-shifting that a naive implementation would require on every eviction. - /// - Entries expire automatically without any background thread or explicit invalidation call. - /// Stale data is never returned to the caller. - /// - TTL expiry and insertion-order eviction compose cleanly: the oldest entry is always evicted - /// first among those that have already expired. - /// - A single `global_ttl` keeps configuration simple; every entry ages at the same rate. - /// - /// ### Cons - /// - /// - Wall-clock dependency. Correctness relies on a monotonically advancing system clock. - /// Clock adjustments (NTP steps, suspend/resume) can cause entries to expire earlier or later - /// than intended. - /// - When `sweep_interval` is set, a background thread wakes on that interval to sweep and - /// remove all expired entries. This adds a small amount of background CPU usage and - /// introduces a reaper thread for the lifetime of the cache. - /// - No per-entry TTL override. All entries share `global_ttl`; mixed expiry requirements need - /// a different policy or a wrapper layer. - /// - The rare O(n) index rebase (triggered when `front_offset` nears `usize::MAX - isize::MAX`) - /// introduces an occasional latency spike. Amortized cost is negligible, but worst-case - /// latency is unbounded in principle. - /// - /// ## When to use it - /// Reach for `TTLPolicy` when: - /// - Cached data has a natural freshness window: API responses, auth tokens, DNS records, - /// rate-limit counters, or any value that becomes incorrect or unsafe after a known interval. - /// - You need automatic expiry without a background reaper thread — expiry sweeps on common - /// method calls are sufficient, or you want continuous reclamation via `sweep_interval`. - /// - Access patterns are unpredictable or uniform enough that recency- or frequency-based - /// eviction (LRU/LFU) would offer no meaningful advantage. - /// - /// Avoid it when: - /// - Your workload has strong temporal locality and you need a best-effort hit rate policy — - /// LRU will serve you better. - /// - Per-entry TTL granularity is required. If different keys need different lifetimes, - /// consider a policy that accepts per-insertion expiry hints. - /// - Your environment has an unreliable or adjustable system clock, where wall-clock-based - /// expiry may behave unexpectedly. [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] PyTTLCache as "TTLCache" (onceinit::OnceInit>); } @@ -112,18 +49,12 @@ impl PyTTLCache { &self, py: pyo3::Python, maxsize: usize, - global_ttl: utils::FloatOrTimedelta, + global_ttl: utils::TimeToLiveArgument, iterable: Option, capacity: usize, getsizeof: Option, ) -> pyo3::PyResult<()> { - let global_ttl: f64 = global_ttl.into(); - if global_ttl <= 0.0 { - return Err(new_py_error!( - PyValueError, - "global_ttl must be positive and non-zero" - )); - } + let global_ttl = global_ttl.into_duration(false)?; let wrapped = Wrapped::new(ttlpolicy::TTLPolicy::new(capacity), unsafe { ttlpolicy::Shared::with_ttl(maxsize, getsizeof, Some(global_ttl)) @@ -159,22 +90,21 @@ impl PyTTLCache { } #[inline] - fn current_size(&self) -> pyo3::PyResult { + fn current_size(&self) -> usize { let inner = self.0.get(); let mut policy = inner.policy(); - policy.expire(inner.shared().generation_version())?; - Ok(policy.current_size()) + policy.expire(inner.shared().generation_version()); + policy.current_size() } #[inline] - fn remaining_size(&self) -> pyo3::PyResult { + fn remaining_size(&self) -> usize { let inner = self.0.get(); { let mut policy = inner.policy(); - policy.expire(inner.shared().generation_version())?; + policy.expire(inner.shared().generation_version()); } - - Ok(inner.remaining_size()) + inner.remaining_size() } #[getter] @@ -557,7 +487,7 @@ impl PyTTLCache { fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let iter = inner.policy().iter(inner.shared())?; + let iter = inner.policy().iter(inner.shared()); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -574,7 +504,7 @@ impl PyTTLCache { fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let iter = inner.policy().iter(inner.shared())?; + let iter = inner.policy().iter(inner.shared()); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -591,7 +521,7 @@ impl PyTTLCache { fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { let inner = self.0.get(); - let iter = inner.policy().iter(inner.shared())?; + let iter = inner.policy().iter(inner.shared()); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -654,17 +584,16 @@ impl PyTTLCache { #[inline] #[pyo3(signature=(*, reuse=false))] - fn expire(&self, reuse: bool) -> pyo3::PyResult<()> { + fn expire(&self, reuse: bool) { let inner = self.0.get(); let shared = inner.shared(); let mut policy = inner.policy(); - policy.expire(shared.generation_version())?; + policy.expire(shared.generation_version()); if !reuse { policy.shrink_to_fit(shared); } - Ok(()) } #[pyo3(signature = (n=0))] @@ -676,7 +605,7 @@ impl PyTTLCache { let inner = self.0.get(); let mut policy = inner.policy(); - policy.expire(inner.shared().generation_version())?; + policy.expire(inner.shared().generation_version()); if n < 0 { n += policy.entries().len() as isize; @@ -695,7 +624,7 @@ impl PyTTLCache { let inner = self.0.get(); let mut policy = inner.policy(); - policy.expire(inner.shared().generation_version())?; + policy.expire(inner.shared().generation_version()); match policy.entries().back() { Some(handle) => Ok(handle.key().as_ref().clone_ref(py)), @@ -787,7 +716,7 @@ impl PyTTLCache { ) -> pyo3::PyResult> { let inner = self.0.get(); - let iter = inner.policy().iter(inner.shared())?; + let iter = inner.policy().iter(inner.shared()); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); diff --git a/src/pyclasses/vttlcache.rs b/src/pyclasses/vttlcache.rs new file mode 100644 index 0000000..7cfdf84 --- /dev/null +++ b/src/pyclasses/vttlcache.rs @@ -0,0 +1,684 @@ +use crate::internal::alias; +use crate::internal::lazyheap; +use crate::internal::onceinit; +use crate::internal::utils; +use crate::policies::traits::HandleExt; +use crate::policies::traits::PolicyExt; +use crate::policies::traits::SharedExt; +use crate::policies::vttlpolicy; +use crate::policies::wrapped::Wrapped; + +implement_pyclass! { + /// A cache with a Variable Time-To-Live (VTTL) eviction policy. + /// + /// Each item can be inserted with its own individual TTL (time-to-live). When + /// an item's TTL expires, it is considered stale and will be evicted. Items + /// inserted without a TTL never expire and are only evicted when the cache + /// reaches capacity. + [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] + PyVTTLCache as "VTTLCache" (onceinit::OnceInit>); +} + +#[pyo3::pymethods] +impl PyVTTLCache { + #[new] + #[allow(unused_variables)] + #[pyo3(signature=(*args, **kwds))] + fn __new__( + args: alias::ArgsType, + kwds: Option, + ) -> (Self, crate::pyclasses::base::PyBaseCacheImpl) { + ( + Self(onceinit::OnceInit::uninit()), + crate::pyclasses::base::PyBaseCacheImpl, + ) + } + + /// Initialize a new `PyTTLCache` instance. + /// + /// Args: + /// maxsize: Maximum number of elements the cache can hold. + /// iterable: Initial data to populate the cache. + /// ttl: Time-to-live duration for `iterable` items. This *is not* a global ttl. + /// capacity: Pre-allocate capacity to minimize reallocations. Defaults to 0. + /// getsizeof: A callable that computes the size of a key-value pair. When `None`, each + /// entry is assumed to have a size of 1 (equivalent to `lambda k, v: 1`). + /// Use this to implement weighted caching — for example, sizing entries by + /// memory footprint or byte length. + /// + /// The cache can be pre-sized via `capacity` to reduce hash table reallocations when + /// the number of expected entries is known ahead of time. + #[pyo3(signature=(maxsize, iterable=None, ttl=None, *, capacity=0, getsizeof=None))] + fn __init__( + &self, + py: pyo3::Python, + maxsize: usize, + iterable: Option, + ttl: Option, + capacity: usize, + getsizeof: Option, + ) -> pyo3::PyResult<()> { + let wrapped = Wrapped::new( + vttlpolicy::VTTLPolicy::new(capacity), + vttlpolicy::Shared::new(maxsize, getsizeof), + ); + + // Populate cache if `iterable` passed + let extend_result = { + if let Some(iterable) = iterable { + let ttl: Option = match ttl { + Some(x) => Some(x.into_seconds_f64(true)?.into()), + None => None, + }; + + let getsizeof = wrapped.shared().getsizeof().clone_ref(py); + + let result = wrapped.extend( + // iterable object + iterable, + // transform function + |key, value| vttlpolicy::ExpiringHandle::new(py, &getsizeof, ttl, key, value), + ); + result + } else { + Ok(()) + } + }; + + self.0.set(wrapped); + extend_result + } + + #[getter] + #[inline] + fn maxsize(&self) -> usize { + let inner = self.0.get(); + inner.shared().maxsize() + } + + #[inline] + fn current_size(&self) -> usize { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.expire(inner.shared().generation_version()); + policy.current_size() + } + + #[inline] + fn remaining_size(&self) -> usize { + let inner = self.0.get(); + { + let mut policy = inner.policy(); + policy.expire(inner.shared().generation_version()); + } + + inner.remaining_size() + } + + #[getter] + #[inline] + fn getsizeof(&self, py: pyo3::Python) -> Option { + let inner = self.0.get(); + inner.shared().getsizeof().clone_ref(py).into() + } + + /// Returns the number of elements the map can hold without reallocating. + #[inline] + fn capacity(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().capacity() + } + + /// Returns the number of entries currently in the cache. + #[inline] + fn __len__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + debug_assert!(policy.table().len() == policy.heap().len()); + policy.table().len() + } + + #[inline] + fn __sizeof__(&self) -> usize { + let inner = self.0.get(); + let policy = inner.policy(); + + let table_cap = policy.table().capacity() * 8; + let list_cap = policy.heap().len() * std::mem::size_of::(); + + table_cap + list_cap + } + + #[inline] + fn __bool__(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + !policy.table().is_empty() + } + + #[inline] + fn __contains__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + self.contains(py, key) + } + + /// Returns `true` if the cache contains an entry for `key`. + #[inline] + fn contains(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + let inner = self.0.get(); + inner.contains(py, &key) + } + + /// Returns `True` if cache is empty. + #[inline] + fn is_empty(&self) -> bool { + let inner = self.0.get(); + let policy = inner.policy(); + + policy.table().is_empty() + } + + /// Returns `True` when the cumulative size has reached the maxsize limit. + #[inline] + fn is_full(&self) -> bool { + let inner = self.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + policy.current_size() >= shared.maxsize() + } + + /// Equals to `self[key] = value`, but returns a value: + /// + /// - If the cache did not have this key present, None is returned. + /// - If the cache did have this key present, the value is updated, + /// and the old value is returned. The key is not updated, though. + #[pyo3(signature=(key, value, ttl=None))] + fn insert( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ttl: Option, + ) -> pyo3::PyResult> { + let ttl = match ttl { + Some(x) => Some(x.into_seconds_f64(true)?), + None => None, + }; + + let inner = self.0.get(); + let shared = inner.shared(); + let handle = vttlpolicy::ExpiringHandle::new( + py, + shared.getsizeof(), + ttl.map(Into::into), + key, + value, + )?; + + let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); + Ok(old_handle) + } + + /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + #[pyo3(signature=(iterable, ttl=None))] + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + iterable: alias::PyObject, + ttl: Option, + ) -> pyo3::PyResult<()> { + if std::ptr::eq(slf.as_ptr(), iterable.as_ptr()) { + return Ok(()); + } + + let ttl: Option = match ttl { + Some(x) => Some(x.into_seconds_f64(true)?.into()), + None => None, + }; + + let inner = slf.0.get(); + let shared = inner.shared(); + let getsizeof = shared.getsizeof().clone_ref(py); + + inner.extend( + // iterable object + iterable.into_bound(py), + // transform function + move |key, value| vttlpolicy::ExpiringHandle::new(py, &getsizeof, ttl, key, value), + ) + } + + #[inline] + fn __setitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + value: alias::PyObject, + ) -> pyo3::PyResult<()> { + self.insert(py, key, value, None)?; + Ok(()) + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind()) + }, + } + } + + fn __getitem__( + &self, + py: pyo3::Python, + key: alias::PyObject, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + match policy.get(py, &key)? { + Some(x) => Ok(x.value().clone_ref(py)), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined, ttl=None))] + fn setdefault( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ttl: Option, + ) -> pyo3::PyResult { + // 1. Try to get value + // 2. If exists -> return it + // 3. Else -> insert default -> return default + let ttl: Option = match ttl { + Some(x) => Some(x.into_seconds_f64(true)?.into()), + None => None, + }; + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + if let Some(x) = policy.get(py, &key)? { + return Ok(x.value().clone_ref(py)); + } + drop(policy); + + let default_object = match default { + utils::OptionalArgument::Defined(x) => x, + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind() + }, + }; + + let handle = vttlpolicy::ExpiringHandle::with_precomputed_hash_key( + py, + shared.getsizeof(), + ttl, + key, + default_object.clone_ref(py), + )?; + + inner.insert(py, handle)?; + Ok(default_object) + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(x) = inner.remove(py, &key)? { + return Ok(x.into_value()); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok(x), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn __delitem__(&self, py: pyo3::Python, key: alias::PyObject) -> pyo3::PyResult<()> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + match inner.remove(py, &key)? { + Some(_) => Ok(()), + None => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + /// Remove and return a (key, value) pair as a 2-tuple. + fn popitem(&self) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); + + let (key, val) = handle.into_pair(); + Ok((key.into(), val)) + } + + /// Calls the `popitem()` `n` times and returns count of removed items. + #[inline] + fn drain( + &self, + py: pyo3::Python, + n: pyo3::ffi::Py_ssize_t, + ) -> pyo3::PyResult { + let inner = self.0.get(); + inner.drain(py, n) + } + + /// Shrinks the internal allocation as close to the current length as possible. + #[inline] + fn shrink_to_fit(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.shrink_to_fit(inner.shared()); + } + + /// Removes all entries from the table and resets the cumulative size to zero. + #[pyo3(signature=(*, reuse=false))] + fn clear(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.clear(shared); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + fn __eq__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(true); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy.py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + } + + fn __ne__( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python, + other: pyo3::PyRef<'_, Self>, + ) -> pyo3::PyResult { + if std::ptr::eq(slf.as_ptr(), other.as_ptr()) { + return Ok(false); + } + + let self_inner = slf.0.get(); + let other_inner = other.0.get(); + + let self_policy = self_inner.policy(); + let other_policy = other_inner.policy(); + + self_policy + .py_eq( + py, + self_inner.shared(), + &*other_policy, + other_inner.shared(), + ) + .map(|x| !x) + } + + fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyVTTLCacheItems { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Py::new(py, result) + } + + fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyVTTLCacheValues { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Py::new(py, result) + } + + fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyVTTLCacheKeys { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Py::new(py, result) + } + + #[inline] + fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.keys(py) + } + + fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { + let inner = self.0.get(); + let cloned = inner.clone_ref(py); + + let result = Self(onceinit::OnceInit::new(cloned)); + + pyo3::Py::new(py, (result, crate::pyclasses::base::PyBaseCacheImpl)) + } + + #[inline] + fn __copy__(&self, py: pyo3::Python) -> pyo3::PyResult> { + self.copy(py) + } + + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { + let inner = slf.0.get(); + let shared = inner.shared(); + let policy = inner.policy(); + + let now = std::time::SystemTime::now(); + + // We cannot use heap.iter here, because it requires re-sorting + // and this can lead to intrupt iterators. + let iter = unsafe { + policy + .table() + .iter() + .map(|bucket| bucket.as_ref().element()) + .filter(|handle| !handle.is_expired(now)) + .map(|handle| { + ( + // Without `.bind` it returns something like `Py(addr)` + handle.key().as_ref().bind(py), + handle.value().bind(py), + ) + }) + }; + + let items = utils::items_to_str(iter, policy.table().len()).unwrap(); + format!( + "{}[{}/{}]({})", + unsafe { utils::get_type_name(py, slf.as_ptr()) }, + policy.current_size(), + shared.maxsize(), + items + ) + } + + #[inline] + #[pyo3(signature=(*, reuse=false))] + fn expire(&self, reuse: bool) { + let inner = self.0.get(); + let shared = inner.shared(); + let mut policy = inner.policy(); + + policy.expire(shared.generation_version()); + + if !reuse { + policy.shrink_to_fit(shared); + } + } + + // TODO: items_with_expire, get_with_expire, pop_with_expire, popitem_with_expire + + fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + let inner = self.0.get(); + let policy = inner.policy(); + + for cursor in unsafe { policy.table().iter() } { + let handle = unsafe { cursor.as_ref().element() }; + + visit.call(handle.key().as_ref())?; + visit.call(handle.value())?; + } + Ok(()) + } + + fn __clear__(&self) { + let inner = self.0.get(); + let mut policy = inner.policy(); + policy.clear(inner.shared()); + } +} + +// Implement iterators +macro_rules! implement_iterator { + ( + $( + $name:ident as $pyname:literal + fn ($py:ident, $handle:ident) -> $rt_type:ty { $init:expr } + )+ + ) => { + $( + implement_pyclass! { + [generic, frozen] $name as $pyname { + initial_gv: u32, + gv: utils::GenerationVersion, + iter: parking_lot::Mutex>, + } + } + + #[pyo3::pymethods] + impl $name { + #[inline] + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + fn __next__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult<$rt_type> { + if slf.initial_gv != slf.gv.get() { + return Err(new_py_error!( + PyRuntimeError, + "cache size changed during iteration" + )); + } + + let now = std::time::SystemTime::now(); + let mut iter = slf.iter.lock(); + let $py = slf.py(); + + while let Some(x) = iter.next() { + let $handle = unsafe { x.element() }; + if $handle.is_expired(now) { + continue; + } + + return Ok($init); + } + + Err(new_py_error!(PyStopIteration, ())) + } + } + )+ + }; +} +implement_iterator!( + PyVTTLCacheItems as "vttlcache_items" + fn(py, handle) -> (alias::PyObject, alias::PyObject) {{ + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val) + }} + + PyVTTLCacheKeys as "vttlcache_keys" + fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } + + PyVTTLCacheValues as "vttlcache_values" + fn(py, handle) -> alias::PyObject { handle.value().clone_ref(py) } +); diff --git a/tests/test_impls.py b/tests/test_impls.py index 3b06870..51cc272 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -1308,3 +1308,39 @@ def test_sweep_interval(self): assert len(obj) == 3 time.sleep(3.5) assert len(obj) == 0 + + # TODO: more tests for sweep_interval + + +class TestVTTLCache( + mixins.InitializeMixin, + mixins.InsertAndGetMixin, + mixins.PopitemMixin, + mixins.SetDefaultMixin, + mixins.PopAndDeleteMixin, + mixins.UpdateMixin, + mixins.IntrospectionMixin, + mixins.IterationMixin, + mixins.DrainClearShrinkMixin, + mixins.CopyMixin, + mixins.GetSizeOfMixin, + mixins.EdgeCasesMixin, + mixins.IssuesMixin, + mixins.FuzzyMixin, +): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.VTTLCache: + return cachebox.VTTLCache( + maxsize, + iterable, + 100, + capacity=capacity, + getsizeof=getsizeof, + ) + + # TODO: complete vttlcache tests From c686f79e80211aa831ffc3559d9e44e381541281 Mon Sep 17 00:00:00 2001 From: awolverp Date: Wed, 27 May 2026 11:53:58 +0330 Subject: [PATCH 27/60] Fix VTTLCache expiry, and change __repr__ formats --- src/internal/utils.rs | 113 ++++++++++++---- src/policies/ttlpolicy.rs | 27 +--- src/policies/vttlpolicy.rs | 27 +--- src/pyclasses/cache.rs | 3 +- src/pyclasses/fifocache.rs | 3 +- src/pyclasses/lfucache.rs | 3 +- src/pyclasses/lrucache.rs | 3 +- src/pyclasses/rrcache.rs | 3 +- src/pyclasses/ttlcache.rs | 20 ++- src/pyclasses/vttlcache.rs | 25 ++-- tests/test_impls.py | 270 ++++++++++++++++++++++++++++++++++++- 11 files changed, 385 insertions(+), 112 deletions(-) diff --git a/src/internal/utils.rs b/src/internal/utils.rs index 59d3954..092cafa 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -223,46 +223,105 @@ impl<'a, 'py> pyo3::FromPyObject<'a, 'py> for OptionalArgument { } /// It can use as PyO3 function argument. Accepts Python `float`, `dateime.timedelta`, and `datetime.datetime`. -#[derive(pyo3::FromPyObject)] +#[derive(pyo3::FromPyObject, Debug)] pub enum TimeToLiveArgument { Float(f64), - Timedelta(chrono::TimeDelta), - Datetime(chrono::DateTime), + Timedelta(std::time::Duration), + DatetimeUtc(chrono::DateTime), + DatetimeNaive(chrono::NaiveDateTime), } impl TimeToLiveArgument { - /// Consumes self and returns [`std::time::Duration`]. + /// Consumes self and returns [`ExpiresAt`]. + /// + /// In this method, [`Self::Datetime`] is allowed. #[inline(always)] - pub fn into_duration(self, datetime_allowed: bool) -> pyo3::PyResult { - self.into_seconds_f64(datetime_allowed) - .map(std::time::Duration::from_secs_f64) + pub fn into_expires_at(self) -> pyo3::PyResult { + match self { + Self::Float(secs) if secs > 0.0 => Ok(ExpiresAt::Duration( + std::time::Duration::from_secs_f64(secs), + )), + Self::Timedelta(delta) if delta > std::time::Duration::ZERO => { + Ok(ExpiresAt::Duration(delta)) + } + Self::DatetimeUtc(until) if until > chrono::Utc::now() => Ok(ExpiresAt::from(until)), + Self::DatetimeNaive(until) if until > chrono::Local::now().naive_local() => { + Ok(ExpiresAt::from(until)) + } + _ => Err(new_py_error!( + PyValueError, + "time-to-live must be positive and non-zero" + )), + } } + /// Consumes self and returns [`std::time::Duration`]. + /// + /// In this method, [`Self::Datetime`] is not allowed. #[inline(always)] - pub fn into_seconds_f64(self, datetime_allowed: bool) -> pyo3::PyResult { - let seconds = match self { - Self::Float(x) => x, - Self::Timedelta(x) => x.as_seconds_f64(), - Self::Datetime(x) => { - if !datetime_allowed { - return Err(new_py_error!( - PyValueError, - "expected datetime.timedelta or float, got datetime.datetime" - )); - } else { - (chrono::Utc::now() - x).as_seconds_f64() - } - } - }; - - if seconds <= 0.0 { - return Err(new_py_error!( + pub fn into_duration(self) -> pyo3::PyResult { + match self { + Self::Float(secs) if secs > 0.0 => Ok(std::time::Duration::from_secs_f64(secs)), + Self::Timedelta(delta) if delta > std::time::Duration::ZERO => Ok(delta), + Self::DatetimeUtc(_) | Self::DatetimeNaive(_) => Err(new_py_error!( + PyTypeError, + "expected dateime.timedelta or float, got datetime.datetime" + )), + _ => Err(new_py_error!( PyValueError, "time-to-live must be positive and non-zero" - )); + )), } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum ExpiresAt { + SystemTime(std::time::SystemTime), + Duration(std::time::Duration), +} + +impl From for ExpiresAt { + #[inline] + fn from(value: std::time::Duration) -> Self { + Self::Duration(value) + } +} + +impl From for ExpiresAt { + #[inline] + fn from(value: std::time::SystemTime) -> Self { + Self::SystemTime(value) + } +} + +impl From> for ExpiresAt { + #[inline] + fn from(value: chrono::DateTime) -> Self { + Self::SystemTime(value.into()) + } +} + +impl From for ExpiresAt { + #[inline] + fn from(value: chrono::NaiveDateTime) -> Self { + // Treat naive as local time, consistent with Python's datetime.now() + let utc: chrono::DateTime = value + .and_local_timezone(chrono::Local) + .single() + .map(|dt| dt.with_timezone(&chrono::Utc)) + .unwrap_or_else(|| value.and_utc()); + + Self::SystemTime(utc.into()) + } +} - Ok(seconds) +impl From for std::time::SystemTime { + fn from(value: ExpiresAt) -> Self { + match value { + ExpiresAt::Duration(dur) => std::time::SystemTime::now() + dur, + ExpiresAt::SystemTime(until) => until, + } } } diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index dc21099..adad9e5 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -19,29 +19,6 @@ macro_rules! get_handle { }; } -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum ExpiresAt { - SystemTime(std::time::SystemTime), - Duration(std::time::Duration), -} - -impl From for ExpiresAt { - #[inline] - fn from(value: std::time::Duration) -> Self { - Self::Duration(value) - } -} - -impl From for std::time::SystemTime { - #[inline] - fn from(value: ExpiresAt) -> Self { - match value { - ExpiresAt::Duration(x) => std::time::SystemTime::now() + x, - ExpiresAt::SystemTime(x) => x, - } - } -} - /// A key-value pair with a precomputed hash and combined size. pub struct ExpiringHandle { /// The cache key together with its precomputed hash, avoiding repeated @@ -61,7 +38,7 @@ impl ExpiringHandle { pub fn new( py: pyo3::Python<'_>, getsizeof: &utils::GetsizeofFunction, - expires_at: ExpiresAt, + expires_at: utils::ExpiresAt, key: alias::PyObject, value: alias::PyObject, ) -> pyo3::PyResult { @@ -82,7 +59,7 @@ impl ExpiringHandle { pub fn with_precomputed_hash_key( py: pyo3::Python<'_>, getsizeof: &utils::GetsizeofFunction, - expires_at: ExpiresAt, + expires_at: utils::ExpiresAt, key: utils::PrecomputedHashObject, value: alias::PyObject, ) -> pyo3::PyResult { diff --git a/src/policies/vttlpolicy.rs b/src/policies/vttlpolicy.rs index 9db4042..6429683 100644 --- a/src/policies/vttlpolicy.rs +++ b/src/policies/vttlpolicy.rs @@ -21,29 +21,6 @@ macro_rules! compare_fn { }; } -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum ExpiresAt { - SystemTime(std::time::SystemTime), - Duration(std::time::Duration), -} - -impl From for ExpiresAt { - #[inline] - fn from(value: f64) -> Self { - Self::Duration(std::time::Duration::from_secs_f64(value)) - } -} - -impl From for std::time::SystemTime { - #[inline] - fn from(value: ExpiresAt) -> Self { - match value { - ExpiresAt::Duration(x) => std::time::SystemTime::now() + x, - ExpiresAt::SystemTime(x) => x, - } - } -} - /// A key-value pair with a precomputed hash and combined size. pub struct ExpiringHandle { /// The cache key together with its precomputed hash, avoiding repeated @@ -63,7 +40,7 @@ impl ExpiringHandle { pub fn new( py: pyo3::Python<'_>, getsizeof: &utils::GetsizeofFunction, - expires_at: Option, + expires_at: Option, key: alias::PyObject, value: alias::PyObject, ) -> pyo3::PyResult { @@ -84,7 +61,7 @@ impl ExpiringHandle { pub fn with_precomputed_hash_key( py: pyo3::Python<'_>, getsizeof: &utils::GetsizeofFunction, - expires_at: Option, + expires_at: Option, key: utils::PrecomputedHashObject, value: alias::PyObject, ) -> pyo3::PyResult { diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index 8589b57..cc15c40 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -570,9 +570,8 @@ impl PyCache { let items = utils::items_to_str(iter, policy.table().len()).unwrap(); format!( - "{}[{}/{}]({})", + "{}[maxsize={}]({})", unsafe { utils::get_type_name(py, slf.as_ptr()) }, - policy.current_size(), shared.maxsize(), items ) diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index f0b1b14..fce4de4 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -569,9 +569,8 @@ impl PyFIFOCache { let items = utils::items_to_str(iter, policy.table().len()).unwrap(); format!( - "{}[{}/{}]({})", + "{}[maxsize={}]({})", unsafe { utils::get_type_name(py, slf.as_ptr()) }, - policy.current_size(), shared.maxsize(), items ) diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs index 1e188d3..8423819 100644 --- a/src/pyclasses/lfucache.rs +++ b/src/pyclasses/lfucache.rs @@ -617,9 +617,8 @@ impl PyLFUCache { let items = utils::items_to_str(iter, policy.table().len()).unwrap(); format!( - "{}[{}/{}]({})", + "{}[maxsize={}]({})", unsafe { utils::get_type_name(py, slf.as_ptr()) }, - policy.current_size(), shared.maxsize(), items ) diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs index bd9235c..11714b8 100644 --- a/src/pyclasses/lrucache.rs +++ b/src/pyclasses/lrucache.rs @@ -599,9 +599,8 @@ impl PyLRUCache { let items = utils::items_to_str(iter, policy.table().len()).unwrap(); format!( - "{}[{}/{}]({})", + "{}[maxsize={}]({})", unsafe { utils::get_type_name(py, slf.as_ptr()) }, - policy.current_size(), shared.maxsize(), items ) diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs index f4fa6b6..59af05c 100644 --- a/src/pyclasses/rrcache.rs +++ b/src/pyclasses/rrcache.rs @@ -575,9 +575,8 @@ impl PyRRCache { let items = utils::items_to_str(iter, policy.table().len()).unwrap(); format!( - "{}[{}/{}]({})", + "{}[maxsize={}]({})", unsafe { utils::get_type_name(py, slf.as_ptr()) }, - policy.current_size(), shared.maxsize(), items ) diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index 29d7e1a..6c3c616 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -54,23 +54,30 @@ impl PyTTLCache { capacity: usize, getsizeof: Option, ) -> pyo3::PyResult<()> { - let global_ttl = global_ttl.into_duration(false)?; + let global_ttl = global_ttl.into_duration()?; let wrapped = Wrapped::new(ttlpolicy::TTLPolicy::new(capacity), unsafe { - ttlpolicy::Shared::with_ttl(maxsize, getsizeof, Some(global_ttl)) + ttlpolicy::Shared::with_ttl(maxsize, getsizeof, Some(global_ttl.into())) }); // Populate cache if `iterable` passed let extend_result = { if let Some(iterable) = iterable { - let ttl: ttlpolicy::ExpiresAt = wrapped.shared().global_ttl().unwrap().into(); let getsizeof = wrapped.shared().getsizeof().clone_ref(py); let result = wrapped.extend( // iterable object iterable, // transform function - |key, value| ttlpolicy::ExpiringHandle::new(py, &getsizeof, ttl, key, value), + |key, value| { + ttlpolicy::ExpiringHandle::new( + py, + &getsizeof, + global_ttl.into(), + key, + value, + ) + }, ); result } else { @@ -230,7 +237,7 @@ impl PyTTLCache { let inner = slf.0.get(); let shared = inner.shared(); - let ttl: ttlpolicy::ExpiresAt = unsafe { shared.global_ttl().unwrap_unchecked().into() }; + let ttl: utils::ExpiresAt = unsafe { shared.global_ttl().unwrap_unchecked().into() }; let getsizeof = shared.getsizeof().clone_ref(py); inner.extend( @@ -574,9 +581,8 @@ impl PyTTLCache { let items = utils::items_to_str(iter, policy.table().len()).unwrap(); format!( - "{}[{}/{}]({})", + "{}[maxsize={}]({})", unsafe { utils::get_type_name(py, slf.as_ptr()) }, - policy.current_size(), shared.maxsize(), items ) diff --git a/src/pyclasses/vttlcache.rs b/src/pyclasses/vttlcache.rs index 7cfdf84..37dffd1 100644 --- a/src/pyclasses/vttlcache.rs +++ b/src/pyclasses/vttlcache.rs @@ -66,8 +66,8 @@ impl PyVTTLCache { // Populate cache if `iterable` passed let extend_result = { if let Some(iterable) = iterable { - let ttl: Option = match ttl { - Some(x) => Some(x.into_seconds_f64(true)?.into()), + let ttl: Option = match ttl { + Some(x) => Some(x.into_expires_at()?), None => None, }; @@ -206,19 +206,13 @@ impl PyVTTLCache { ttl: Option, ) -> pyo3::PyResult> { let ttl = match ttl { - Some(x) => Some(x.into_seconds_f64(true)?), + Some(x) => Some(x.into_expires_at()?), None => None, }; let inner = self.0.get(); let shared = inner.shared(); - let handle = vttlpolicy::ExpiringHandle::new( - py, - shared.getsizeof(), - ttl.map(Into::into), - key, - value, - )?; + let handle = vttlpolicy::ExpiringHandle::new(py, shared.getsizeof(), ttl, key, value)?; let old_handle = inner.insert(py, handle)?.map(|x| x.into_value()); Ok(old_handle) @@ -236,8 +230,8 @@ impl PyVTTLCache { return Ok(()); } - let ttl: Option = match ttl { - Some(x) => Some(x.into_seconds_f64(true)?.into()), + let ttl = match ttl { + Some(x) => Some(x.into_expires_at()?), None => None, }; @@ -319,8 +313,8 @@ impl PyVTTLCache { // 1. Try to get value // 2. If exists -> return it // 3. Else -> insert default -> return default - let ttl: Option = match ttl { - Some(x) => Some(x.into_seconds_f64(true)?.into()), + let ttl = match ttl { + Some(x) => Some(x.into_expires_at()?), None => None, }; let key = utils::PrecomputedHashObject::new(py, key)?; @@ -574,9 +568,8 @@ impl PyVTTLCache { let items = utils::items_to_str(iter, policy.table().len()).unwrap(); format!( - "{}[{}/{}]({})", + "{}[maxsize={}]({})", unsafe { utils::get_type_name(py, slf.as_ptr()) }, - policy.current_size(), shared.maxsize(), items ) diff --git a/tests/test_impls.py b/tests/test_impls.py index 51cc272..42dae25 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -1,6 +1,6 @@ import time import typing -from datetime import timedelta +from datetime import datetime, timedelta import pytest @@ -1343,4 +1343,270 @@ def create_cache( getsizeof=getsizeof, ) - # TODO: complete vttlcache tests + +class TestVTTLCachePolicy(mixins.BaseMixin): + def create_cache( + self, + maxsize: int = 10, + iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + ) -> cachebox.VTTLCache: + return cachebox.VTTLCache( + maxsize, + iterable, + 100, + capacity=capacity, + getsizeof=getsizeof, + ) + + def test_item_accessible_before_ttl(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.5) + assert c["k"] == "v" + + def test_item_expires_after_ttl(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + time.sleep(0.15) + assert "k" not in c + + def test_expired_item_not_returned_by_get(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + time.sleep(0.15) + assert c.get("k") is None + assert c.get("k", "default") == "default" + + def test_expired_item_raises_on_getitem(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + time.sleep(0.15) + with pytest.raises(KeyError): + _ = c["k"] + + def test_no_ttl_item_never_expires(self): + c = self.create_cache() + c.insert("k", "v") # no TTL + time.sleep(0.1) + assert c["k"] == "v" + + # def test_expired_item_excluded_from_len(self): + # c = self.create_cache() + # c.insert("a", 1, ttl=0.1) + # c.insert("b", 2) + # time.sleep(0.15) + # assert len(c) == 1 + + def test_expired_key_not_in_contains(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + time.sleep(0.15) + assert not c.contains("k") + assert "k" not in c + + def test_ttl_as_float(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + time.sleep(0.15) + assert "k" not in c + + def test_ttl_as_timedelta(self): + c = self.create_cache() + c.insert("k", "v", ttl=timedelta(milliseconds=100)) + time.sleep(0.15) + assert "k" not in c + + def test_ttl_as_datetime(self): + c = self.create_cache() + expiry = datetime.now() + timedelta(milliseconds=100) + c.insert("k", "v", ttl=expiry) + assert "k" in c + time.sleep(0.15) + assert "k" not in c + + def test_datetime_in_the_past_expires_immediately(self): + c = self.create_cache() + past = datetime.now() - timedelta(seconds=1) + c.insert("k", "v", ttl=past) + assert "k" not in c + + def test_items_have_independent_ttls(self): + c = self.create_cache() + c.insert("short", "s", ttl=0.1) + c.insert("long", "l", ttl=1.0) + time.sleep(0.15) + assert "short" not in c + assert "long" in c + + def test_mixed_ttl_and_no_ttl(self): + c = self.create_cache() + c.insert("expires", "e", ttl=0.1) + c.insert("permanent", "p") + time.sleep(0.15) + assert "expires" not in c + assert "permanent" in c + + def test_multiple_items_expire_independently(self): + c = self.create_cache() + c.insert("a", 1, ttl=0.1) + c.insert("b", 2, ttl=0.2) + c.insert("c", 3, ttl=0.3) + time.sleep(0.15) + assert "a" not in c + assert "b" in c + assert "c" in c + time.sleep(0.1) + assert "b" not in c + assert "c" in c + + def test_reinsertion_resets_ttl(self): + c = self.create_cache() + c.insert("k", "v1", ttl=0.2) + time.sleep(0.1) + c.insert("k", "v2", ttl=0.2) # reset + time.sleep(0.15) + # original TTL would have expired; new one should not + assert "k" in c + assert c["k"] == "v2" + + def test_reinsertion_without_ttl_makes_permanent(self): + c = self.create_cache() + c.insert("k", "v1", ttl=0.1) + c.insert("k", "v2") # no TTL — should become permanent + time.sleep(0.15) + assert "k" in c + + def test_setitem_uses_no_ttl(self): + """__setitem__ inserts without TTL; previously TTL'd key should persist.""" + c = self.create_cache() + c.insert("k", "v1", ttl=0.1) + c["k"] = "v2" + time.sleep(0.15) + assert "k" in c + assert c["k"] == "v2" + + def test_update_applies_ttl_to_all_items(self): + c = self.create_cache() + c.update({"a": 1, "b": 2}, ttl=0.1) + time.sleep(0.15) + assert "a" not in c + assert "b" not in c + + def test_update_without_ttl_items_are_permanent(self): + c = self.create_cache() + c.update({"a": 1, "b": 2}) + time.sleep(0.1) + assert "a" in c + assert "b" in c + + def test_update_mixes_with_existing_items(self): + c = self.create_cache() + c.insert("perm", 0) + c.update({"temp": 1}, ttl=0.1) + time.sleep(0.15) + assert "temp" not in c + assert "perm" in c + + def test_setdefault_inserts_with_ttl_when_absent(self): + c = self.create_cache() + c.setdefault("k", "v", ttl=0.1) + assert c["k"] == "v" + time.sleep(0.15) + assert "k" not in c + + def test_setdefault_does_not_update_existing_key(self): + c = self.create_cache() + c.insert("k", "original", ttl=1.0) + c.setdefault("k", "new", ttl=0.1) + time.sleep(0.15) + # should still be there with original TTL + assert c["k"] == "original" + + def test_popitem_removes_soonest_expiring_item(self): + c = self.create_cache() + c.insert("soon", "s", ttl=0.1) + c.insert("later", "l", ttl=10.0) + key, _ = c.popitem() + assert key == "soon" + + def test_popitem_prefers_expiring_over_permanent(self): + c = self.create_cache() + c.insert("perm", "p") + c.insert("temp", "t", ttl=0.5) + key, _ = c.popitem() + assert key == "temp" + + def test_popitem_on_empty_raises(self): + c = self.create_cache() + with pytest.raises(KeyError): + c.popitem() + + def test_expire_removes_stale_items(self): + c = self.create_cache() + c.insert("stale", "s", ttl=0.1) + c.insert("fresh", "f", ttl=10.0) + time.sleep(0.15) + c.expire() + assert "stale" not in c + assert "fresh" in c + + def test_expire_does_not_remove_unexpired_items(self): + c = self.create_cache() + c.insert("a", 1, ttl=10.0) + c.insert("b", 2) + c.expire() + assert "a" in c + assert "b" in c + + def test_expire_reuse_retains_capacity(self): + c = self.create_cache() + c.insert("k", "v", ttl=0.1) + cap_before = c.capacity() + time.sleep(0.15) + c.expire(reuse=True) + assert c.capacity() >= cap_before + + def test_soonest_expiring_evicted_when_full(self): + c = self.create_cache(maxsize=2) + c.insert("a", 1, ttl=0.2) + c.insert("b", 2, ttl=10.0) + # inserting a third item must evict "a" (soonest expiry) + c.insert("c", 3, ttl=10.0) + assert "b" in c + assert "c" in c + assert "a" not in c + + def test_expired_items_cleared_on_insert_when_full(self): + c = self.create_cache(maxsize=2) + c.insert("a", 1, ttl=0.1) + c.insert("b", 2, ttl=0.1) + time.sleep(0.15) + # both expired; inserting should succeed + c.insert("c", 3) + assert "c" in c + + def test_keys_excludes_expired(self): + c = self.create_cache() + c.insert("exp", "e", ttl=0.1) + c.insert("live", "l") + time.sleep(0.15) + assert "exp" not in list(c.keys()) + assert "live" in list(c.keys()) + + def test_values_excludes_expired(self): + c = self.create_cache() + c.insert("exp", "expired_val", ttl=0.1) + c.insert("live", "live_val") + time.sleep(0.15) + assert "expired_val" not in list(c.values()) + assert "live_val" in list(c.values()) + + def test_items_excludes_expired(self): + c = self.create_cache() + c.insert("exp", "e", ttl=0.1) + c.insert("live", "l") + time.sleep(0.15) + keys = [k for k, _ in c.items()] + assert "exp" not in keys + assert "live" in keys From 3a91f7095f9680e6403ce00df6919fdaa1ef2db1 Mon Sep 17 00:00:00 2001 From: awolverp Date: Wed, 27 May 2026 12:03:29 +0330 Subject: [PATCH 28/60] Support negative `ttl`s in VTTLCache --- src/internal/utils.rs | 57 +++++++++++++++---------------------------- tests/test_impls.py | 9 +++++++ 2 files changed, 28 insertions(+), 38 deletions(-) diff --git a/src/internal/utils.rs b/src/internal/utils.rs index 092cafa..1499193 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -222,54 +222,35 @@ impl<'a, 'py> pyo3::FromPyObject<'a, 'py> for OptionalArgument { } } -/// It can use as PyO3 function argument. Accepts Python `float`, `dateime.timedelta`, and `datetime.datetime`. #[derive(pyo3::FromPyObject, Debug)] pub enum TimeToLiveArgument { Float(f64), - Timedelta(std::time::Duration), + Timedelta(chrono::TimeDelta), DatetimeUtc(chrono::DateTime), DatetimeNaive(chrono::NaiveDateTime), } impl TimeToLiveArgument { - /// Consumes self and returns [`ExpiresAt`]. - /// - /// In this method, [`Self::Datetime`] is allowed. #[inline(always)] pub fn into_expires_at(self) -> pyo3::PyResult { match self { - Self::Float(secs) if secs > 0.0 => Ok(ExpiresAt::Duration( - std::time::Duration::from_secs_f64(secs), - )), - Self::Timedelta(delta) if delta > std::time::Duration::ZERO => { - Ok(ExpiresAt::Duration(delta)) - } - Self::DatetimeUtc(until) if until > chrono::Utc::now() => Ok(ExpiresAt::from(until)), - Self::DatetimeNaive(until) if until > chrono::Local::now().naive_local() => { - Ok(ExpiresAt::from(until)) - } - _ => Err(new_py_error!( - PyValueError, - "time-to-live must be positive and non-zero" - )), + Self::Float(secs) => Ok(ExpiresAt::Duration(std::time::Duration::from_secs_f64( + secs.max(0.0), + ))), + Self::Timedelta(delta) => Ok(ExpiresAt::from(delta)), + Self::DatetimeUtc(until) => Ok(ExpiresAt::from(until)), + Self::DatetimeNaive(until) => Ok(ExpiresAt::from(until)), } } - /// Consumes self and returns [`std::time::Duration`]. - /// - /// In this method, [`Self::Datetime`] is not allowed. #[inline(always)] pub fn into_duration(self) -> pyo3::PyResult { match self { - Self::Float(secs) if secs > 0.0 => Ok(std::time::Duration::from_secs_f64(secs)), - Self::Timedelta(delta) if delta > std::time::Duration::ZERO => Ok(delta), + Self::Float(secs) => Ok(std::time::Duration::from_secs_f64(secs.max(0.0))), + Self::Timedelta(delta) => Ok(delta.to_std().unwrap_or(std::time::Duration::ZERO)), Self::DatetimeUtc(_) | Self::DatetimeNaive(_) => Err(new_py_error!( PyTypeError, - "expected dateime.timedelta or float, got datetime.datetime" - )), - _ => Err(new_py_error!( - PyValueError, - "time-to-live must be positive and non-zero" + "expected datetime.timedelta or float, got datetime.datetime" )), } } @@ -277,8 +258,8 @@ impl TimeToLiveArgument { #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum ExpiresAt { - SystemTime(std::time::SystemTime), Duration(std::time::Duration), + Instant(chrono::DateTime), } impl From for ExpiresAt { @@ -288,39 +269,39 @@ impl From for ExpiresAt { } } -impl From for ExpiresAt { +impl From for ExpiresAt { #[inline] - fn from(value: std::time::SystemTime) -> Self { - Self::SystemTime(value) + fn from(value: chrono::TimeDelta) -> Self { + // Negative or zero timedelta collapses to ZERO duration (expire immediately) + Self::Duration(value.to_std().unwrap_or(std::time::Duration::ZERO)) } } impl From> for ExpiresAt { #[inline] fn from(value: chrono::DateTime) -> Self { - Self::SystemTime(value.into()) + Self::Instant(value) } } impl From for ExpiresAt { #[inline] fn from(value: chrono::NaiveDateTime) -> Self { - // Treat naive as local time, consistent with Python's datetime.now() let utc: chrono::DateTime = value .and_local_timezone(chrono::Local) .single() .map(|dt| dt.with_timezone(&chrono::Utc)) .unwrap_or_else(|| value.and_utc()); - - Self::SystemTime(utc.into()) + Self::Instant(utc) } } impl From for std::time::SystemTime { + #[inline] fn from(value: ExpiresAt) -> Self { match value { ExpiresAt::Duration(dur) => std::time::SystemTime::now() + dur, - ExpiresAt::SystemTime(until) => until, + ExpiresAt::Instant(until) => until.into(), } } } diff --git a/tests/test_impls.py b/tests/test_impls.py index 42dae25..a162e29 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -1431,6 +1431,15 @@ def test_datetime_in_the_past_expires_immediately(self): c.insert("k", "v", ttl=past) assert "k" not in c + c.insert("k", "v", ttl=-0.1) + assert "k" not in c + + c.insert("k", "v", ttl=-1) + assert "k" not in c + + c.insert("k", "v", ttl=timedelta(days=-1)) + assert "k" not in c + def test_items_have_independent_ttls(self): c = self.create_cache() c.insert("short", "s", ttl=0.1) From 0fc2863438ab1c33e843c84319c5254dfc7c7416 Mon Sep 17 00:00:00 2001 From: awolverp Date: Wed, 27 May 2026 12:29:38 +0330 Subject: [PATCH 29/60] Complete refcatoring VTTLCache --- cachebox/_core.pyi | 64 +++++++++++++++++ src/pyclasses/ttlcache.rs | 7 ++ src/pyclasses/vttlcache.rs | 140 ++++++++++++++++++++++++++++++++++++- tests/test_impls.py | 90 ++++++++++++++++++++++++ 4 files changed, 300 insertions(+), 1 deletion(-) diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 1246553..e91c5c5 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -1473,3 +1473,67 @@ class VTTLCache(BaseCacheImpl[KT, VT]): rather than freeing it. Defaults to ``False``. """ ... + + def get_with_expire( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Tuple[typing.Union[VT, DT], float | None]: + """ + Retrieves a value along with its remaining TTL. + + Args: + key: The key to look up. + default: Value to return if the key is not found. + + Returns: + A tuple of ``(value, remaining_ttl)`` where ``remaining_ttl`` is + the expiration duration in seconds, or ``0.0`` if the key was not + found. + """ + ... + + def pop_with_expire( + self, + key: KT, + default: typing.Optional[DT] = None, + ) -> typing.Tuple[typing.Union[VT, DT], float | None]: + """ + Removes a key and returns its value along with its remaining TTL. + + Args: + key: The key to remove. + default: Value to return if the key is not found. + + Returns: + A tuple of ``(value, remaining_ttl)`` where ``remaining_ttl`` is + the expiration duration in seconds, or ``0.0`` if the key was not + found. + """ + ... + + def popitem_with_expire(self) -> typing.Tuple[VT, DT, float | None]: + """ + Removes and returns the oldest item along with its remaining TTL. + + Returns: + A tuple of ``(key, value, remaining_ttl)`` where ``remaining_ttl`` + is the expiration duration in seconds. + + Raises: + KeyError: If the cache is empty. + """ + ... + + def items_with_expire(self) -> typing.Iterable[typing.Tuple[KT, VT, float | None]]: + """ + Returns an ordered iterable of items with their remaining TTL. + + Warning: + Do not modify the cache while iterating. + + Returns: + An iterable of ``(key, value, remaining_ttl)`` tuples in insertion + order, where ``remaining_ttl`` is in seconds. + """ + ... diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index 6c3c616..b909c58 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -56,6 +56,13 @@ impl PyTTLCache { ) -> pyo3::PyResult<()> { let global_ttl = global_ttl.into_duration()?; + if global_ttl == std::time::Duration::ZERO { + return Err(new_py_error!( + PyValueError, + "global_ttl must be positive and non-zero" + )); + } + let wrapped = Wrapped::new(ttlpolicy::TTLPolicy::new(capacity), unsafe { ttlpolicy::Shared::with_ttl(maxsize, getsizeof, Some(global_ttl.into())) }); diff --git a/src/pyclasses/vttlcache.rs b/src/pyclasses/vttlcache.rs index 37dffd1..ac90b4d 100644 --- a/src/pyclasses/vttlcache.rs +++ b/src/pyclasses/vttlcache.rs @@ -1,3 +1,5 @@ +use pyo3::IntoPyObjectExt; + use crate::internal::alias; use crate::internal::lazyheap; use crate::internal::onceinit; @@ -589,7 +591,125 @@ impl PyVTTLCache { } } - // TODO: items_with_expire, get_with_expire, pop_with_expire, popitem_with_expire + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn get_with_expire( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + let mut policy = inner.policy(); + + if let Some(handle) = policy.get(py, &key)? { + let dur = match handle.expires_at() { + Some(x) => { + let secs = x + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default() + .as_secs_f64(); + + secs.into_py_any(py)? + } + None => py.None(), + }; + + return Ok((handle.value().clone_ref(py), dur)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok((x, py.None())), + utils::OptionalArgument::Undefined => unsafe { + // SAFETY: None is immortal, so reference counting has no meaning + Ok(( + pyo3::Bound::from_owned_ptr(py, pyo3::ffi::Py_None()).unbind(), + py.None(), + )) + }, + } + } + + #[pyo3(signature = (key, default=utils::OptionalArgument::Undefined))] + fn pop_with_expire( + &self, + py: pyo3::Python, + key: alias::PyObject, + default: utils::OptionalArgument, + ) -> pyo3::PyResult<(alias::PyObject, alias::PyObject)> { + let key = utils::PrecomputedHashObject::new(py, key)?; + + let inner = self.0.get(); + + if let Some(handle) = inner.remove(py, &key)? { + let dur = match handle.expires_at() { + Some(x) => { + let secs = x + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default() + .as_secs_f64(); + + secs.into_py_any(py)? + } + None => py.None(), + }; + + return Ok((handle.into_value(), dur)); + } + + match default { + utils::OptionalArgument::Defined(x) => Ok((x, py.None())), + utils::OptionalArgument::Undefined => Err(new_py_error!( + PyKeyError, + Into::::into(key) + )), + } + } + + fn popitem_with_expire( + &self, + py: pyo3::Python, + ) -> pyo3::PyResult<(alias::PyObject, alias::PyObject, alias::PyObject)> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let handle = policy.evict(inner.shared())?; + drop(policy); + + let dur = match handle.expires_at() { + Some(x) => { + let secs = x + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default() + .as_secs_f64(); + + secs.into_py_any(py)? + } + None => py.None(), + }; + + let (key, val) = handle.into_pair(); + Ok((key.into(), val, dur)) + } + + fn items_with_expire( + &self, + py: pyo3::Python, + ) -> pyo3::PyResult> { + let inner = self.0.get(); + let mut policy = inner.policy(); + + let gv = inner.shared().generation_version(); + let iter = policy.iter(gv); + + let result = PyVTTLCacheItemsWithExpire { + iter: parking_lot::Mutex::new(iter), + gv: gv.clone(), + initial_gv: gv.get(), + }; + pyo3::Py::new(py, result) + } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { let inner = self.0.get(); @@ -669,6 +789,24 @@ implement_iterator!( (key.into(), val) }} + PyVTTLCacheItemsWithExpire as "vttlcache_items_with_expire" + fn(py, handle) -> (alias::PyObject, alias::PyObject, alias::PyObject) {{ + let dur = match handle.expires_at() { + Some(x) => { + let secs = x + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default() + .as_secs_f64(); + + secs.into_py_any(py)? + } + None => py.None(), + }; + + let (key, val) = handle.clone_ref(py).into_pair(); + (key.into(), val, dur) + }} + PyVTTLCacheKeys as "vttlcache_keys" fn(py, handle) -> alias::PyObject { handle.key().clone_ref(py).into() } diff --git a/tests/test_impls.py b/tests/test_impls.py index a162e29..07c4a88 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -1619,3 +1619,93 @@ def test_items_excludes_expired(self): keys = [k for k, _ in c.items()] assert "exp" not in keys assert "live" in keys + + def test_get_with_expire(self): + obj = self.create_cache(2) + + obj.insert(1, 1, 10) + time.sleep(0.1) + value, dur = obj.get_with_expire(1) + assert 1 == value + assert isinstance(dur, float) and 10 > dur > 9, ( + "10 > dur > 9 failed [dur: %f]" % dur + ) + + obj.insert(1, 1, None) + time.sleep(0.1) + value, dur = obj.get_with_expire(1) + assert 1 == value + assert dur is None, "dur is None failed [dur: {}]".format(dur) + + value, dur = obj.get_with_expire("no-exists") + assert value is None + assert dur is None + + value, dur = obj.get_with_expire("no-exists", "value") + assert "value" == value + assert dur is None + + def test_pop_with_expire(self): + obj = self.create_cache(2) + + obj.insert(1, 1, 10) + time.sleep(0.1) + value, dur = obj.pop_with_expire(1) + assert 1 == value + assert isinstance(dur, float) and 10 > dur > 9, ( + "10 > dur > 9 failed [dur: %f]" % dur + ) + + obj.insert(1, 1, None) + time.sleep(0.1) + value, dur = obj.pop_with_expire(1) + assert 1 == value + assert dur is None, "dur is None failed [dur: {}]".format(dur) + + value, dur = obj.pop_with_expire("no-exists", None) + assert value is None + assert dur is None + + value, dur = obj.pop_with_expire("no-exists", "value") + assert "value" == value + assert dur is None + + def test_popitem_with_expire(self): + obj = self.create_cache(2) + + obj.insert(1, 1, 10) + obj.insert(2, 2, 20) + time.sleep(0.1) + key, value, dur = obj.popitem_with_expire() + assert (1, 1) == (key, value) + assert isinstance(dur, float) and 10 > dur > 9, ( + "10 > dur > 9 failed [dur: %f]" % dur + ) + + key, value, dur = obj.popitem_with_expire() + assert (2, 2) == (key, value) + assert isinstance(dur, float) and 20 > dur > 19, ( + "20 > dur > 19 failed [dur: %f]" % dur + ) + + with pytest.raises(KeyError): + obj.popitem_with_expire() + + def test_items_with_expire(self): + # no need to test completely items_with_expire + # because it's tested in test_iterators + obj = self.create_cache(10, {1: 2, 3: 4}) + for key, val, ttl in obj.items_with_expire(): + assert key in obj + assert val == obj[key] + assert isinstance(ttl, float) + + def test_sweep_interval(self): + obj = cachebox.VTTLCache(10, {1: 1, 2: 2, 3: 3}, 3, sweep_interval=3) + + # __len__ doesn't call expire itself + assert len(obj) == 3 + time.sleep(3.5) + assert len(obj) == 0 + + # TODO: more tests for sweep_interval From b0f3e593b81803a61d6ab81b44fcf239eef673e2 Mon Sep 17 00:00:00 2001 From: awolverp Date: Wed, 27 May 2026 12:38:00 +0330 Subject: [PATCH 30/60] Update tests --- tests/mixins.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/mixins.py b/tests/mixins.py index 7dabab9..8e90319 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -753,6 +753,15 @@ def test_fuzzy_copy_is_independent_of_original( c2.insert(new_key, new_value) assert not c.contains(new_key) + @given(key=hashable_keys, value=any_value) + def test_fuzzy_chain_methods(self, key, value): + c = self.create_cache(maxsize=0) + assert c.insert(key, value) is None + assert c.setdefault(key, value) == value + assert c.get(key) == value + assert c[key] == value + assert c.pop(key) == value + class BenchmarkMixin(BaseMixin): @pytest.fixture(autouse=True) From 7827cafafe82e7f7334b7e8928eb9c2f12188a97 Mon Sep 17 00:00:00 2001 From: awolverp Date: Wed, 27 May 2026 17:55:16 +0330 Subject: [PATCH 31/60] Add tests for sweep_intervals, improve performance of __eq/ne__ --- src/policies/fifopolicy.rs | 39 +++--- src/policies/lfupolicy.rs | 33 ++--- src/policies/lrupolicy.rs | 39 +++--- src/policies/nopolicy.rs | 27 ++--- src/policies/rrpolicy.rs | 27 ++--- src/policies/ttlpolicy.rs | 46 +++---- src/policies/vttlpolicy.rs | 37 +++--- tests/mixins.py | 242 ++++++++++++++++++++++++++++++++++++- tests/test_impls.py | 115 +++++++++--------- 9 files changed, 396 insertions(+), 209 deletions(-) diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index 6c333f2..c26300a 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -344,34 +344,28 @@ impl PolicyExt for FIFOPolicy { } let mut error = None; - let result = unsafe { - let mut iterator = self.table.iter().map(|x| x.as_ref()); - - iterator.all(|index_1| { - let handle_1 = get_handle!(&self, *index_1); - let result = other.table.get(handle_1.key().hash(), |index| { - handle_1.key().py_eq(py, get_handle!(&other, *index).key()) - }); - - match result { + let result = unsafe { + self.table.iter().all(|x| { + let handle = get_handle!(&self, *x.as_ref()); + let key = handle.key(); + + match other + .table + .get(key.hash(), |i| key.py_eq(py, get_handle!(&other, *i).key())) + { Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } Ok(None) => false, - Ok(Some(index_2)) => { - let handle_2 = get_handle!(&other, *index_2); - - let value_1 = handle_1.value(); - let value_2 = handle_2.value(); - - match utils::pyobject_equal(py, value_1.as_ptr(), value_2.as_ptr()) { - Ok(result) => result, + Ok(Some(i)) => { + let v1 = handle.value(); + let v2 = get_handle!(&other, *i).value(); + match utils::pyobject_equal(py, v1.as_ptr(), v2.as_ptr()) { + Ok(eq) => eq, Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } } @@ -380,10 +374,7 @@ impl PolicyExt for FIFOPolicy { }) }; - if let Some(error) = error { - return Err(error); - } - Ok(result) + error.map_or(Ok(result), Err) } fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs index 2f884c8..cac2555 100644 --- a/src/policies/lfupolicy.rs +++ b/src/policies/lfupolicy.rs @@ -416,35 +416,31 @@ impl PolicyExt for LFUPolicy { } let mut error = None; - let result = unsafe { - let mut iterator = self.table.iter().map(|x| x.as_ref()); - iterator.all(|cursor_1| { - let handle_1 = cursor_1.element(); + let result = unsafe { + self.table.iter().all(|x| { + let handle = x.as_ref().element(); - let result = other.table.get(handle_1.key().hash(), |cursor| { - handle_1.key().py_eq(py, cursor.element().key()) - }); + let key = handle.key(); - match result { + match other + .table + .get(key.hash(), |c| key.py_eq(py, c.element().key())) + { Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } Ok(None) => false, - Ok(Some(cursor_2)) => { - let handle_2 = cursor_2.element(); - + Ok(Some(cursor)) => { match utils::pyobject_equal( py, - handle_1.value.as_ptr(), - handle_2.value.as_ptr(), + handle.value.as_ptr(), + cursor.element().value.as_ptr(), ) { - Ok(result) => result, + Ok(eq) => eq, Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } } @@ -453,10 +449,7 @@ impl PolicyExt for LFUPolicy { }) }; - if let Some(error) = error { - return Err(error); - } - Ok(result) + error.map_or(Ok(result), Err) } fn clone_ref(&mut self, py: pyo3::Python) -> Self { diff --git a/src/policies/lrupolicy.rs b/src/policies/lrupolicy.rs index 0d2b2ba..e520678 100644 --- a/src/policies/lrupolicy.rs +++ b/src/policies/lrupolicy.rs @@ -259,34 +259,28 @@ impl PolicyExt for LRUPolicy { } let mut error = None; - let result = unsafe { - let mut iterator = self.table.iter().map(|x| x.as_ref()); - - iterator.all(|cursor_1| { - let handle_1 = cursor_1.element(); - let result = other.table.get(handle_1.key().hash(), |cursor| { - handle_1.key().py_eq(py, cursor.element().key()) - }); - - match result { + let result = unsafe { + self.table.iter().all(|x| { + let handle = x.as_ref().element(); + let key = handle.key(); + + match other + .table + .get(key.hash(), |c| key.py_eq(py, c.element().key())) + { Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } Ok(None) => false, - Ok(Some(cursor_2)) => { - let handle_2 = cursor_2.element(); - - let value_1 = handle_1.value(); - let value_2 = handle_2.value(); - - match utils::pyobject_equal(py, value_1.as_ptr(), value_2.as_ptr()) { - Ok(result) => result, + Ok(Some(cursor)) => { + let v1 = handle.value(); + let v2 = cursor.element().value(); + match utils::pyobject_equal(py, v1.as_ptr(), v2.as_ptr()) { + Ok(eq) => eq, Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } } @@ -295,10 +289,7 @@ impl PolicyExt for LRUPolicy { }) }; - if let Some(error) = error { - return Err(error); - } - Ok(result) + error.map_or(Ok(result), Err) } fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 08a2ab2..ad8ec27 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -197,30 +197,22 @@ impl traits::PolicyExt for NoPolicy { } let mut error = None; - let result = unsafe { - let mut iterator = self.table.iter().map(|x| x.as_ref()); - iterator.all(|handle_1| { - let result = other - .table - .get(handle_1.key().hash(), |x| handle_1.key().py_eq(py, x.key())); + let result = unsafe { + self.table.iter().map(|x| x.as_ref()).all(|h1| { + let key = h1.key(); - match result { + match other.table.get(key.hash(), |x| key.py_eq(py, x.key())) { Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } Ok(None) => false, - Ok(Some(handle_2)) => { - let value_1 = handle_1.value(); - let value_2 = handle_2.value(); - - match utils::pyobject_equal(py, value_1.as_ptr(), value_2.as_ptr()) { - Ok(result) => result, + Ok(Some(h2)) => { + match utils::pyobject_equal(py, h1.value().as_ptr(), h2.value().as_ptr()) { + Ok(eq) => eq, Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } } @@ -229,10 +221,7 @@ impl traits::PolicyExt for NoPolicy { }) }; - if let Some(error) = error { - return Err(error); - } - Ok(result) + error.map_or(Ok(result), Err) } fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { diff --git a/src/policies/rrpolicy.rs b/src/policies/rrpolicy.rs index ab211ba..0c56009 100644 --- a/src/policies/rrpolicy.rs +++ b/src/policies/rrpolicy.rs @@ -208,30 +208,22 @@ impl PolicyExt for RRPolicy { } let mut error = None; - let result = unsafe { - let mut iterator = self.table.iter().map(|x| x.as_ref()); - iterator.all(|handle_1| { - let result = other - .table - .get(handle_1.key().hash(), |x| handle_1.key().py_eq(py, x.key())); + let result = unsafe { + self.table.iter().map(|x| x.as_ref()).all(|h1| { + let key = h1.key(); - match result { + match other.table.get(key.hash(), |x| key.py_eq(py, x.key())) { Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } Ok(None) => false, - Ok(Some(handle_2)) => { - let value_1 = handle_1.value(); - let value_2 = handle_2.value(); - - match utils::pyobject_equal(py, value_1.as_ptr(), value_2.as_ptr()) { - Ok(result) => result, + Ok(Some(h2)) => { + match utils::pyobject_equal(py, h1.value().as_ptr(), h2.value().as_ptr()) { + Ok(eq) => eq, Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } } @@ -240,10 +232,7 @@ impl PolicyExt for RRPolicy { }) }; - if let Some(error) = error { - return Err(error); - } - Ok(result) + error.map_or(Ok(result), Err) } fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index adad9e5..f51fb86 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -460,7 +460,6 @@ impl PolicyExt for TTLPolicy { self.front_offset = 0; } - // TODO: considering expired handles fn py_eq( &self, py: pyo3::Python, @@ -468,42 +467,38 @@ impl PolicyExt for TTLPolicy { other: &Self, other_shared: &Self::Shared, ) -> pyo3::PyResult { - if shared.maxsize() != other_shared.maxsize() - || shared.global_ttl() != other_shared.global_ttl() - || self.table.len() != other.table.len() - { + if shared.maxsize() != other_shared.maxsize() || self.table.len() != other.table.len() { return Ok(false); } let mut error = None; - let result = unsafe { - let mut iterator = self.table.iter().map(|x| x.as_ref()); + let now = std::time::SystemTime::now(); - iterator.all(|index_1| { - let handle_1 = get_handle!(&self, *index_1); + let result = unsafe { + self.table.iter().all(|x| { + let handle = get_handle!(&self, *x.as_ref()); + if handle.is_expired(now) { + return true; + } - let result = other.table.get(handle_1.key().hash(), |index| { - handle_1.key().py_eq(py, get_handle!(&other, *index).key()) - }); + let key = handle.key(); - match result { + match other + .table + .get(key.hash(), |i| key.py_eq(py, get_handle!(&other, *i).key())) + { Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } Ok(None) => false, - Ok(Some(index_2)) => { - let handle_2 = get_handle!(&other, *index_2); - - let value_1 = handle_1.value(); - let value_2 = handle_2.value(); - - match utils::pyobject_equal(py, value_1.as_ptr(), value_2.as_ptr()) { - Ok(result) => result, + Ok(Some(i)) => { + let v1 = handle.value(); + let v2 = get_handle!(&other, *i).value(); + match utils::pyobject_equal(py, v1.as_ptr(), v2.as_ptr()) { + Ok(eq) => eq, Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } } @@ -512,10 +507,7 @@ impl PolicyExt for TTLPolicy { }) }; - if let Some(error) = error { - return Err(error); - } - Ok(result) + error.map_or(Ok(result), Err) } fn clone_ref(&mut self, py: pyo3::Python<'_>) -> Self { diff --git a/src/policies/vttlpolicy.rs b/src/policies/vttlpolicy.rs index 6429683..d4aa7f4 100644 --- a/src/policies/vttlpolicy.rs +++ b/src/policies/vttlpolicy.rs @@ -403,7 +403,6 @@ impl PolicyExt for VTTLPolicy { self.heap.shrink_to_fit(); } - // TODO: considering expired handles fn py_eq( &self, py: pyo3::Python, @@ -416,35 +415,36 @@ impl PolicyExt for VTTLPolicy { } let mut error = None; + let now = std::time::SystemTime::now(); + let result = unsafe { - let mut iterator = self.table.iter().map(|x| x.as_ref()); + self.table.iter().all(|x| { + let handle = x.as_ref().element(); - iterator.all(|cursor_1| { - let handle_1 = cursor_1.element(); + if handle.is_expired(now) { + return true; + } - let result = other.table.get(handle_1.key().hash(), |cursor| { - handle_1.key().py_eq(py, cursor.element().key()) - }); + let key = handle.key(); - match result { + match other + .table + .get(key.hash(), |c| key.py_eq(py, c.element().key())) + { Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } Ok(None) => false, - Ok(Some(cursor_2)) => { - let handle_2 = cursor_2.element(); - + Ok(Some(cursor)) => { match utils::pyobject_equal( py, - handle_1.value.as_ptr(), - handle_2.value.as_ptr(), + handle.value.as_ptr(), + cursor.element().value.as_ptr(), ) { - Ok(result) => result, + Ok(eq) => eq, Err(e) => { error = Some(e); - // Return false to break the `.all` loop false } } @@ -453,10 +453,7 @@ impl PolicyExt for VTTLPolicy { }) }; - if let Some(error) = error { - return Err(error); - } - Ok(result) + error.map_or(Ok(result), Err) } fn clone_ref(&mut self, py: pyo3::Python) -> Self { diff --git a/tests/mixins.py b/tests/mixins.py index 8e90319..673e139 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -1,6 +1,10 @@ import dataclasses import sys +import threading +import time import typing +from datetime import timedelta +from unittest.mock import patch import pytest from hypothesis import assume, given @@ -372,7 +376,75 @@ def test_iter_yields_keys(self): cache.update({"x": 10, "y": 20}) assert set(iter(cache)) == {"x", "y"} - # TODO: test generation version + def test_generation_version_on_remove(self): + cache = self.create_cache(10, {i: i for i in range(10)}) + + with pytest.raises(RuntimeError): + for _ in cache: + del cache[9] + + with pytest.raises(RuntimeError): + for _ in cache.values(): + del cache[8] + + with pytest.raises(RuntimeError): + for _ in cache.items(): + del cache[7] + + for _ in cache: + # It should not increment the generation version + # because the key doesn't exist + cache.pop("hello", None) + + def test_generation_version_on_insert(self): + cache = self.create_cache(10, {i: i for i in range(3)}) + + with pytest.raises(RuntimeError): + for _ in cache: + cache.insert("A", 1) + + with pytest.raises(RuntimeError): + for _ in cache.values(): + cache.insert("B", 1) + + with pytest.raises(RuntimeError): + for _ in cache.items(): + cache.insert("C", 1) + + if isinstance(cache, cachebox.LRUCache): + return + + for i in cache: + # It should not increment the generation version + # in replacing value + cache.insert(i, "hello") + + def test_generation_version_on_shrink_to_fit(self): + cache = self.create_cache(10, {i: i for i in range(3)}) + + if isinstance(cache, cachebox.LRUCache): + pytest.skip("LRUCache is excluded") + + with pytest.raises(RuntimeError): + for _ in cache: + cache.shrink_to_fit() + + def test_generation_version_on_clear(self): + cache = self.create_cache(10, {i: i for i in range(3)}) + + with pytest.raises(RuntimeError): + for _ in cache: + cache.clear() + + def test_generation_version_on_popitem(self): + cache = self.create_cache(10, {i: i for i in range(3)}) + + if isinstance(cache, cachebox.Cache): + pytest.skip("Cache doesn't implemented popitem") + + with pytest.raises(RuntimeError): + for _ in cache: + cache.popitem() class DrainClearShrinkMixin(BaseMixin): @@ -613,6 +685,174 @@ def __hash__(self) -> int: cache.get(EQ(val=i)) +class SweepIntervalMixin(BaseMixin): + def _create_sweep_cache( + self, *args, **kwds + ) -> cachebox.TTLCache | cachebox.VTTLCache: + return typing.cast( + cachebox.TTLCache | cachebox.VTTLCache, + self.create_cache(*args, **kwds), + ) + + def test_none_by_default_no_thread(self): + cache = self._create_sweep_cache(maxsize=10) + assert cache.sweep_interval is None + assert cache._thread is None + assert cache._thread_is_running is False + + def test_numeric_sweep_interval_starts_thread(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + + try: + assert cache._thread is not None + assert cache._thread.is_alive() + assert cache._thread_is_running is True + finally: + cache.stop_sweeper() + + def test_timedelta_sweep_interval_starts_thread(self): + cache = self._create_sweep_cache( + maxsize=10, sweep_interval=timedelta(seconds=1) + ) + try: + assert cache._thread is not None + assert cache._thread.is_alive() + finally: + cache.stop_sweeper() + + def test_timedelta_converted_to_seconds(self): + cache = self._create_sweep_cache( + maxsize=10, sweep_interval=timedelta(seconds=5) + ) + try: + assert cache.sweep_interval == 5.0 + finally: + cache.stop_sweeper() + + def test_sweep_interval_stored_as_float(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=2) + try: + assert cache.sweep_interval == 2.0 + finally: + cache.stop_sweeper() + + def test_sweep_interval_below_1_raises(self): + with pytest.raises( + ValueError, match="sweep_interval must be more than 1 seconds" + ): + self._create_sweep_cache(maxsize=10, sweep_interval=0.5) + + def test_sweep_interval_zero_raises(self): + with pytest.raises(ValueError): + self._create_sweep_cache(maxsize=10, sweep_interval=0) + + def test_sweep_interval_negative_raises(self): + with pytest.raises(ValueError): + self._create_sweep_cache(maxsize=10, sweep_interval=-1) + + def test_sweep_interval_exactly_1_is_valid(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + try: + assert cache.sweep_interval == 1.0 + finally: + cache.stop_sweeper() + + def test_thread_is_daemon(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + try: + assert cache._thread.daemon is True # type: ignore + finally: + cache.stop_sweeper() + + def test_stop_sets_flag_false(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + assert cache._thread_is_running is True + cache.stop_sweeper() + assert cache._thread_is_running is False + + def test_stop_on_cache_without_sweeper_is_safe(self): + cache = self._create_sweep_cache(maxsize=10) + cache.stop_sweeper() # should not raise + assert cache._thread_is_running is False + + def test_stop_idempotent(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + cache.stop_sweeper() + cache.stop_sweeper() # second call must not raise + assert cache._thread_is_running is False + + def test_thread_eventually_stops_after_signal(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + cache.stop_sweeper() + cache._thread.join(timeout=3) # type: ignore + assert not cache._thread.is_alive() # type: ignore + + def test_expire_called_periodically(self): + """expire() should be invoked by the background thread on schedule.""" + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + try: + with patch.object(cache, "expire", wraps=cache.expire) as mock_expire: + time.sleep(2.5) + assert mock_expire.call_count >= 2 + finally: + cache.stop_sweeper() + + def test_expired_items_removed_by_sweeper(self): + """Items with elapsed TTLs should be absent after a sweep cycle.""" + cache = self._create_sweep_cache(maxsize=50, sweep_interval=1) + try: + if isinstance(cache, cachebox.TTLCache): + cache.insert("b", 2) + else: + cache.insert("b", 2, 0.1) + + time.sleep(2) + assert "b" not in cache + finally: + cache.stop_sweeper() + + def test_concurrent_writes_with_sweeper_running(self): + """Concurrent inserts alongside the sweeper must not raise.""" + cache = self._create_sweep_cache(maxsize=100, sweep_interval=1) + errors = [] + + def writer(start): + try: + for i in range(start, start + 50): + cache[f"k{i}"] = i + time.sleep(0.01) + except Exception as exc: + errors.append(exc) + + threads = [threading.Thread(target=writer, args=(i * 50,)) for i in range(4)] + try: + for t in threads: + t.start() + for t in threads: + t.join() + assert errors == [], f"Unexpected errors: {errors}" + finally: + cache.stop_sweeper() + + def test_stop_sweeper_while_sleeping(self): + """stop_sweeper() called mid-sleep should clear the flag without hanging.""" + cache = self._create_sweep_cache(maxsize=10, sweep_interval=30) # long interval + thread = cache._thread + cache.stop_sweeper() + assert cache._thread_is_running is False + assert thread is not None + + def test_del_stops_sweeper(self): + cache = self._create_sweep_cache(maxsize=10, sweep_interval=1) + assert cache._thread_is_running is True + cache.__del__() + assert cache._thread_is_running is False + + def test_del_without_sweeper_is_safe(self): + cache = self._create_sweep_cache(maxsize=10) + cache.__del__() # must not raise + + class FuzzyMixin(BaseMixin): @given(key=hashable_keys, value=any_value) def test_fuzzy_insert_then_get_returns_same_value(self, key, value): diff --git a/tests/test_impls.py b/tests/test_impls.py index 07c4a88..31a899e 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -880,17 +880,17 @@ def test_clear_resets_all_frequencies(self): c["d"] = 4 assert "a" not in c - def test_insert_returns_none_for_new_key(self): - c = self.create_cache(5) - result = c.insert("x", 42) - assert result is None + def test_generation_version_on_least_frequently_used(self): + c = self.create_cache(5, {i: i for i in range(5)}) - def test_insert_returns_old_value_for_existing_key(self): - c = self.create_cache(5) - c["x"] = 1 - old = c.insert("x", 99) - assert old == 1 - assert c["x"] == 99 + self._hit(c, 1, 5) + self._hit(c, 2, 3) + self._hit(c, 4, 10) + + # calling __iter__ causes sorts lazyheap + # so least_frequently_used shouldn't intrupt iteration + for _ in c: + c.least_frequently_used() class TestTTLCache( @@ -925,30 +925,40 @@ def create_cache( ) -class TestTTLCachePolicy: +class TestTTLCachePolicy(mixins.SweepIntervalMixin): def create_cache( self, maxsize: int = 10, - ttl: float | timedelta = 10, iterable: typing.Any = None, + capacity: int = 0, + getsizeof: typing.Any = None, + global_ttl: float | timedelta = 1, + sweep_interval: float | timedelta | None = None, ) -> cachebox.TTLCache: - return cachebox.TTLCache(maxsize, ttl, iterable) + return cachebox.TTLCache( + maxsize, + global_ttl, + iterable, + capacity=capacity, + getsizeof=getsizeof, + sweep_interval=sweep_interval, + ) def test_global_ttl_property(self): - c = self.create_cache(10, 5) + c = self.create_cache(10, global_ttl=5) assert c.global_ttl == 5 - c = self.create_cache(10, timedelta(seconds=5)) + c = self.create_cache(10, global_ttl=timedelta(seconds=5)) assert c.global_ttl == 5 with pytest.raises(ValueError): - c = self.create_cache(10, 0) + c = self.create_cache(10, global_ttl=0) with pytest.raises(ValueError): - c = self.create_cache(10, -1) + c = self.create_cache(10, global_ttl=-1) def test_global_ttl_with_iterable(self): - c = self.create_cache(10, 1, {"A": "B", "C": "D"}) + c = self.create_cache(10, {"A": "B", "C": "D"}, global_ttl=1) assert c.global_ttl == 1 assert "A" in c @@ -968,14 +978,14 @@ def test_global_ttl_with_iterable(self): def test_oldest_item_evicted_on_overflow(self): """When capacity is exceeded, the first inserted key must be evicted.""" - cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) cache[4] = "d" # triggers eviction of key 1 assert 1 not in cache assert 4 in cache def test_eviction_is_strictly_insertion_ordered(self): """Keys evict in the exact order they were inserted, not access order.""" - cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) cache[4] = "d" # evicts 1 cache[5] = "e" # evicts 2 @@ -991,7 +1001,7 @@ def test_accessing_key_does_not_reset_eviction_priority(self): Unlike LRU, a cache hit must NOT push the key to the back. Key 1 is accessed repeatedly but must still be the first evicted. """ - cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) _ = cache[1] _ = cache[1] @@ -1005,7 +1015,7 @@ def test_overwriting_existing_key_does_not_change_eviction_order(self): Updating the value of an existing key must NOT change its insertion position in the eviction queue. """ - cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) cache[1] = "updated" # update, not a new insertion cache[4] = "d" # must still evict key 1 @@ -1015,7 +1025,7 @@ def test_overwriting_existing_key_does_not_change_eviction_order(self): def test_popitem_removes_oldest(self): """popitem() must always remove and return the oldest inserted entry.""" - cache = self.create_cache(3, 10, [(10, "x"), (20, "y"), (30, "z")]) + cache = self.create_cache(3, [(10, "x"), (20, "y"), (30, "z")], global_ttl=10) key, value = cache.popitem() assert key == 10 assert value == "x" @@ -1023,13 +1033,13 @@ def test_popitem_removes_oldest(self): def test_popitem_successive_calls_follow_fifo(self): """Successive popitem() calls must yield keys in insertion order.""" insertion_order = [(1, "a"), (2, "b"), (3, "c"), (4, "d")] - cache = self.create_cache(4, 10, insertion_order) + cache = self.create_cache(4, insertion_order, global_ttl=10) popped_keys = [cache.popitem()[0] for _ in range(4)] assert popped_keys == [1, 2, 3, 4] def test_drain_removes_n_oldest(self): """drain(n) must remove exactly n items, oldest-first.""" - cache = self.create_cache(5, 10, [(i, str(i)) for i in range(1, 6)]) + cache = self.create_cache(5, [(i, str(i)) for i in range(1, 6)], global_ttl=10) removed = cache.drain(3) assert removed == 3 assert 1 not in cache @@ -1039,16 +1049,18 @@ def test_drain_removes_n_oldest(self): assert 5 in cache def test_first_returns_oldest_key(self): - cache = self.create_cache(3, 10, [(7, "a"), (8, "b"), (9, "c")]) + cache = self.create_cache(3, [(7, "a"), (8, "b"), (9, "c")], global_ttl=10) assert cache.first() == 7 def test_last_returns_newest_key(self): - cache = self.create_cache(3, 10, [(7, "a"), (8, "b"), (9, "c")]) + cache = self.create_cache(3, [(7, "a"), (8, "b"), (9, "c")], global_ttl=10) assert cache.last() == 9 def test_first_with_positive_n_browses_in_insertion_order(self): """first(n) must walk forward through insertion order.""" - cache = self.create_cache(4, 10, [(10, "a"), (20, "b"), (30, "c"), (40, "d")]) + cache = self.create_cache( + 4, [(10, "a"), (20, "b"), (30, "c"), (40, "d")], global_ttl=10 + ) assert cache.first(0) == 10 assert cache.first(1) == 20 assert cache.first(2) == 30 @@ -1056,23 +1068,25 @@ def test_first_with_positive_n_browses_in_insertion_order(self): def test_first_with_negative_n_browses_from_end(self): """first(-1) is an alias for last(); first(-2) is the second newest.""" - cache = self.create_cache(4, 10, [(10, "a"), (20, "b"), (30, "c"), (40, "d")]) + cache = self.create_cache( + 4, [(10, "a"), (20, "b"), (30, "c"), (40, "d")], global_ttl=10 + ) assert cache.first(-1) == 40 assert cache.first(-2) == 30 def test_first_after_eviction_reflects_new_head(self): """After an eviction, first() must return the new oldest key.""" - cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) cache[4] = "d" # evicts key 1 assert cache.first() == 2 def test_last_after_insertion_reflects_new_tail(self): - cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) cache[4] = "d" assert cache.last() == 4 def test_first_on_single_element_cache(self): - cache = self.create_cache(1, 10, [(42, "only")]) + cache = self.create_cache(1, [(42, "only")], global_ttl=10) assert cache.first() == 42 assert cache.last() == 42 @@ -1099,7 +1113,7 @@ def test_rolling_window_maintains_correct_contents(self): def test_no_phantom_keys_after_eviction(self): """Evicted keys must not linger in contains() or iteration.""" - cache = self.create_cache(2, 10, [(1, "a"), (2, "b")]) + cache = self.create_cache(2, [(1, "a"), (2, "b")], global_ttl=10) cache[3] = "c" # evicts 1 for key in cache: @@ -1112,7 +1126,7 @@ def test_reinsert_evicted_key_rejoins_at_tail(self): Re-inserting a previously evicted key must treat it as a brand-new entry positioned at the back of the queue. """ - cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")], global_ttl=10) cache[4] = "d" # evicts 1 cache[1] = "re" # re-insert 1 — should now be at the tail cache[5] = "e" # must evict 2 (now the oldest), not 1 @@ -1122,7 +1136,7 @@ def test_reinsert_evicted_key_rejoins_at_tail(self): assert cache[1] == "re" def test_is_full_triggers_at_maxsize(self): - cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) assert cache.is_full() cache[4] = "d" # eviction should keep it full, not overflow assert cache.is_full() @@ -1137,7 +1151,7 @@ def test_len_never_exceeds_maxsize(self): def test_clear_resets_fifo_order(self): """After clear(), the insertion order restarts from scratch.""" - cache = self.create_cache(3, 10, [(1, "a"), (2, "b"), (3, "c")]) + cache = self.create_cache(3, [(1, "a"), (2, "b"), (3, "c")]) cache.clear() cache[10] = "x" cache[20] = "y" @@ -1206,7 +1220,7 @@ def test_edge_case_of_front_offset_overflow(self): assert oldest_val == oldest_key * 10 def test_global_ttl_on_insert(self): - obj = self.create_cache(2, 0.5) + obj = self.create_cache(2, global_ttl=0.5) assert obj.global_ttl == 0.5 obj.insert(0, 1) @@ -1215,7 +1229,7 @@ def test_global_ttl_on_insert(self): with pytest.raises(KeyError): obj[0] - obj = self.create_cache(2, 20) + obj = self.create_cache(2, global_ttl=20) obj.insert(0, 0) obj.insert(1, 1) @@ -1225,7 +1239,7 @@ def test_global_ttl_on_insert(self): assert (1, 1) == obj.popitem() def test_global_ttl_on_update(self): - obj = self.create_cache(2, 0.5) + obj = self.create_cache(2, global_ttl=0.5) # maxsize=2 - (1, 1) should be evicated because obj.update((i + 1, i + 1) for i in range(3)) @@ -1242,7 +1256,7 @@ def test_global_ttl_on_update(self): obj[3] def test_get_with_expire(self): - obj = self.create_cache(2, 10) + obj = self.create_cache(2, global_ttl=10) obj.insert(1, 1) time.sleep(0.1) @@ -1259,7 +1273,7 @@ def test_get_with_expire(self): assert 0 == dur def test_pop_with_expire(self): - obj = self.create_cache(2, 10) + obj = self.create_cache(2, global_ttl=10) obj.insert(1, 1) time.sleep(0.1) @@ -1276,7 +1290,7 @@ def test_pop_with_expire(self): assert 0 == dur def test_popitem_with_expire(self): - obj = self.create_cache(2, 10) + obj = self.create_cache(2, global_ttl=10) obj.insert(1, 1) obj.insert(2, 2) @@ -1295,7 +1309,7 @@ def test_popitem_with_expire(self): def test_items_with_expire(self): # no need to test completely items_with_expire # because it's tested in test_iterators - obj = self.create_cache(10, 3, {1: 2, 3: 4}) + obj = self.create_cache(10, {1: 2, 3: 4}) for key, val, ttl in obj.items_with_expire(): assert key in obj assert val == obj[key] @@ -1309,8 +1323,6 @@ def test_sweep_interval(self): time.sleep(3.5) assert len(obj) == 0 - # TODO: more tests for sweep_interval - class TestVTTLCache( mixins.InitializeMixin, @@ -1344,13 +1356,14 @@ def create_cache( ) -class TestVTTLCachePolicy(mixins.BaseMixin): +class TestVTTLCachePolicy(mixins.SweepIntervalMixin): def create_cache( self, maxsize: int = 10, iterable: typing.Any = None, capacity: int = 0, getsizeof: typing.Any = None, + sweep_interval: float | timedelta | None = None, ) -> cachebox.VTTLCache: return cachebox.VTTLCache( maxsize, @@ -1358,6 +1371,7 @@ def create_cache( 100, capacity=capacity, getsizeof=getsizeof, + sweep_interval=sweep_interval, ) def test_item_accessible_before_ttl(self): @@ -1391,13 +1405,6 @@ def test_no_ttl_item_never_expires(self): time.sleep(0.1) assert c["k"] == "v" - # def test_expired_item_excluded_from_len(self): - # c = self.create_cache() - # c.insert("a", 1, ttl=0.1) - # c.insert("b", 2) - # time.sleep(0.15) - # assert len(c) == 1 - def test_expired_key_not_in_contains(self): c = self.create_cache() c.insert("k", "v", ttl=0.1) @@ -1707,5 +1714,3 @@ def test_sweep_interval(self): assert len(obj) == 3 time.sleep(3.5) assert len(obj) == 0 - - # TODO: more tests for sweep_interval From c051b4cb94fae801b5f0e12e8b7f1203a0405c48 Mon Sep 17 00:00:00 2001 From: awolverp Date: Thu, 28 May 2026 20:22:03 +0330 Subject: [PATCH 32/60] Support pickle & deepcopy for Cache/RRCache Old pickled bytes are incompatible with new version --- cachebox/_core.pyi | 2 + src/internal/pickle.rs | 383 +++++++++++++++---------------------- src/internal/utils.rs | 1 + src/policies/fifopolicy.rs | 20 ++ src/policies/lfupolicy.rs | 19 ++ src/policies/lrupolicy.rs | 20 ++ src/policies/nopolicy.rs | 50 +++++ src/policies/rrpolicy.rs | 50 +++++ src/policies/traits.rs | 22 ++- src/policies/ttlpolicy.rs | 18 ++ src/policies/vttlpolicy.rs | 18 ++ src/policies/wrapped.rs | 38 ++++ src/pyclasses/cache.rs | 11 ++ src/pyclasses/fifocache.rs | 11 ++ src/pyclasses/lfucache.rs | 11 ++ src/pyclasses/lrucache.rs | 11 ++ src/pyclasses/rrcache.rs | 11 ++ src/pyclasses/ttlcache.rs | 11 ++ src/pyclasses/vttlcache.rs | 11 ++ tests/mixins.py | 91 +++++++++ 20 files changed, 582 insertions(+), 227 deletions(-) diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index e91c5c5..375c41c 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -217,6 +217,8 @@ class BaseCacheImpl(typing.Generic[KT, VT]): def __iter__(self) -> typing.Iterator[KT]: ... def copy(self) -> typing.Self: ... def __copy__(self) -> typing.Self: ... + def __getstate__(self) -> object: ... + def __setstate__(self, state: object) -> None: ... def __repr__(self) -> str: ... class Cache(BaseCacheImpl[KT, VT]): diff --git a/src/internal/pickle.rs b/src/internal/pickle.rs index 55f6f2a..2a92842 100644 --- a/src/internal/pickle.rs +++ b/src/internal/pickle.rs @@ -1,68 +1,96 @@ -//! There are utilities for creating and loading pickle states and objects. - use std::ptr; use crate::internal::alias; -/// A simple Python scalar value. -/// -/// | Rust type | Python type | -/// |-----------|-------------| -/// | `usize` | `int` | -/// | `isize` | `int` | -/// | `f64` | `float` | -/// | `bool` | `bool` | -/// | `&str` | `str` | -/// -/// [`PyVal::None`] maps to Python's `None`. -#[derive(Debug, Clone, Copy)] -pub enum PyVal<'a> { +pub enum PyPickleVal<'a> { + Owned(alias::PyObject), + Borrowed(&'a alias::PyObject), + Str(&'a str), Unsigned(usize), Signed(isize), Float(f64), Bool(bool), - Str(&'a str), None, } -impl From for PyVal<'static> { +impl From for PyPickleVal<'static> { + #[inline] fn from(v: usize) -> Self { - PyVal::Unsigned(v) + PyPickleVal::Unsigned(v) } } -impl From for PyVal<'static> { +impl From for PyPickleVal<'static> { + #[inline] fn from(v: isize) -> Self { - PyVal::Signed(v) + PyPickleVal::Signed(v) } } -impl From for PyVal<'static> { +impl From for PyPickleVal<'static> { fn from(v: f64) -> Self { - PyVal::Float(v) + PyPickleVal::Float(v) + } +} +impl From for PyPickleVal<'static> { + #[inline] + fn from(v: std::time::Duration) -> Self { + v.as_secs_f64().into() } } -impl From for PyVal<'static> { +impl From for PyPickleVal<'static> { + #[inline] fn from(v: bool) -> Self { - PyVal::Bool(v) + PyPickleVal::Bool(v) } } -impl<'a> From<&'a str> for PyVal<'a> { +impl<'a> From<&'a str> for PyPickleVal<'a> { + #[inline] fn from(v: &'a str) -> Self { - PyVal::Str(v) + PyPickleVal::Str(v) + } +} +impl<'a> From<&'a alias::PyObject> for PyPickleVal<'a> { + #[inline] + fn from(v: &'a alias::PyObject) -> Self { + PyPickleVal::Borrowed(v) + } +} +impl From for PyPickleVal<'static> { + #[inline] + fn from(v: alias::PyObject) -> Self { + PyPickleVal::Owned(v) + } +} +impl<'a, I> From> for PyPickleVal<'a> +where + I: Into>, +{ + #[inline] + fn from(value: Option) -> Self { + match value { + Some(x) => x.into(), + None => Self::None, + } } } -impl<'a> PyVal<'a> { - /// Allocate a fresh owned Python object. The caller is responsible for - /// exactly one `Py_DECREF` (or transferring ownership to a container). - pub(crate) unsafe fn into_py_raw( - self, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { +// private methods +impl<'a> PyPickleVal<'a> { + /// Allocate a fresh owned Python object. + /// + /// # Safety + /// The caller is responsible for exactly one `Py_DECREF` (or transferring ownership to a container). + unsafe fn into_py_raw(self, py: pyo3::Python<'_>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { let ptr = match self { - PyVal::Unsigned(v) => pyo3::ffi::PyLong_FromSize_t(v), - PyVal::Signed(v) => pyo3::ffi::PyLong_FromSsize_t(v), - PyVal::Float(v) => pyo3::ffi::PyFloat_FromDouble(v), - PyVal::Bool(v) => { + Self::Owned(v) => v.into_ptr(), + Self::Borrowed(v) => { + let ptr = v.as_ptr(); + pyo3::ffi::Py_INCREF(ptr); + ptr + } + Self::Unsigned(v) => pyo3::ffi::PyLong_FromSize_t(v), + Self::Signed(v) => pyo3::ffi::PyLong_FromSsize_t(v), + Self::Float(v) => pyo3::ffi::PyFloat_FromDouble(v), + Self::Bool(v) => { // Py_True / Py_False are singletons; INCREF to hand out our own ref. let raw = if v { pyo3::ffi::Py_True() @@ -72,14 +100,14 @@ impl<'a> PyVal<'a> { pyo3::ffi::Py_INCREF(raw); raw } - PyVal::Str(v) => pyo3::ffi::PyUnicode_FromStringAndSize( + Self::Str(v) => pyo3::ffi::PyUnicode_FromStringAndSize( v.as_ptr() as *const std::os::raw::c_char, v.len() as isize, ), - PyVal::None => { - let raw = pyo3::ffi::Py_None(); - pyo3::ffi::Py_INCREF(raw); - raw + Self::None => { + let none = pyo3::ffi::Py_None(); + pyo3::ffi::Py_INCREF(none); + none } }; @@ -91,24 +119,17 @@ impl<'a> PyVal<'a> { } } -/// A finalised pickle state — an immutable wrapper around a Python tuple. +/// A finalised pickle state - an immutable wrapper around a Python tuple. /// /// Construct with [`Pickle::builder`]. -/// -/// # Immutable access -/// -/// `Pickle` implements [`Deref`] and [`AsRef`] targeting the inner -/// [`alias::PyObject`], so you can pass it wherever a `PyObject` reference is -/// expected without an explicit conversion. Typed access is available via -/// [`Pickle::as_object`] and [`Pickle::as_tuple`]. -/// -/// [`Deref`]: std::ops::Deref +#[repr(transparent)] pub struct Pickle(alias::PyObject); impl Pickle { /// Begin building a top-level pickle tuple with exactly `size` slots. - pub fn builder(py: pyo3::Python<'_>, size: isize) -> pyo3::PyResult { - PickleBuilder::new(py, size) + #[inline] + pub fn builder(py: pyo3::Python<'_>, size: usize) -> pyo3::PyResult { + PickleBuilder::new(py, size as isize) } /// Borrow the inner [`alias::PyObject`] without consuming `self`. @@ -135,27 +156,27 @@ impl AsRef for Pickle { } impl From for alias::PyObject { + #[inline] fn from(v: Pickle) -> Self { v.0 } } -// All three sequence-like builders (PickleBuilder, TupleBuilder, ListBuilder) -// expose the same `push` / `push_tuple` / `push_list` / `push_dict` surface. -// Rather than repeating three times, we generate them with a macro. -// -// Each builder must provide an **inherent** method: -// -// unsafe fn push_owned_impl( -// &mut self, -// py: pyo3::Python<'_>, -// item: *mut pyo3::ffi::PyObject, // caller hands over ownership -// ) -> pyo3::PyResult<()> - +/// Most of builders expose the same `push` / `push_tuple` / `push_list` / `push_dict` surface. +/// Rather than repeating three times, generate them with a macro. +/// +/// Each builder must provide a method: +/// ```ignore +/// unsafe fn push_owned_impl( +/// &mut self, +/// py: pyo3::Python<'_>, +/// item: *mut pyo3::ffi::PyObject, // caller hands over ownership +/// ) -> pyo3::PyResult<()> +/// ``` macro_rules! impl_push_methods { ($ty:ident) => { impl $ty { - /// Push a scalar [`PyVal`] (or anything that converts `Into`). + /// Push a scalar [`PyPickleVal`] (or anything that converts `Into`). /// /// ```rust,ignore /// builder.push(py, 42isize)? @@ -164,7 +185,7 @@ macro_rules! impl_push_methods { /// ``` pub fn push<'a, V>(&mut self, py: pyo3::Python<'_>, val: V) -> pyo3::PyResult<&mut Self> where - V: Into>, + V: Into>, { let raw = unsafe { val.into().into_py_raw(py)? }; unsafe { @@ -252,37 +273,6 @@ macro_rules! impl_push_methods { /// /// If the builder is dropped before `finish` is called, the partially-built /// tuple is correctly decreffed and all already-inserted items are released. -/// -/// # Example -/// -/// Reproduces `(4567, 23343, {3: 4, "a": 39, "AA": (3, 4)}, [2, 3, 4, (4, 5), "A"])`: -/// -/// ```rust,ignore -/// let pickle = Pickle::builder(py, 4)? -/// .push(py, 4567usize)? -/// .push(py, 23343usize)? -/// .push_dict(py, |d| { -/// d.entry(py, 3isize, 4isize)? -/// .entry(py, "a", 39isize)? -/// .entry_tuple(py, "AA", 2, |t| { -/// t.push(py, 3isize)?.push(py, 4isize)?; -/// Ok(()) -/// })?; -/// Ok(()) -/// })? -/// .push_list(py, |l| { -/// l.push(py, 2isize)? -/// .push(py, 3isize)? -/// .push(py, 4isize)? -/// .push_tuple(py, 2, |t| { -/// t.push(py, 4isize)?.push(py, 5isize)?; -/// Ok(()) -/// })? -/// .push(py, "A")?; -/// Ok(()) -/// })? -/// .finish(py); -/// ``` pub struct PickleBuilder { /// `None` only after `finish()` has transferred ownership. inner: Option>, @@ -290,20 +280,22 @@ pub struct PickleBuilder { current: isize, } +// private methods impl PickleBuilder { fn new(py: pyo3::Python<'_>, size: isize) -> pyo3::PyResult { let raw = unsafe { pyo3::ffi::PyTuple_New(size) }; if raw.is_null() { - return Err(pyo3::PyErr::fetch(py)); + Err(pyo3::PyErr::fetch(py)) + } else { + Ok(Self { + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + size, + current: 0, + }) } - Ok(Self { - inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), - size, - current: 0, - }) } - /// # Reference-count contract + /// # Safety /// `PyTuple_SetItem` **steals** `item` on success and **decrefs** it on /// failure, so this function must not touch `item`'s refcount after the call. unsafe fn push_owned_impl( @@ -323,11 +315,10 @@ impl PickleBuilder { self.current += 1; Ok(()) } +} +impl PickleBuilder { /// Finalise the builder into a [`Pickle`]. - /// - /// # Panics (debug only) - /// Panics if some slots were never filled. pub fn finish(mut self, py: pyo3::Python<'_>) -> Pickle { debug_assert_eq!( self.current, @@ -335,12 +326,15 @@ impl PickleBuilder { "PickleBuilder::finish called with {} unfilled slot(s)", self.size - self.current, ); - // Take ownership — Drop will be a no-op (inner == None). + + // Take ownership + // `.take()` makes Drop no-op let ptr = self .inner .take() .expect("PickleBuilder already consumed") .as_ptr(); + let bound = unsafe { pyo3::Bound::from_owned_ptr(py, ptr) }; Pickle(bound.unbind()) } @@ -360,32 +354,17 @@ impl Drop for PickleBuilder { } /// Builds a Python tuple for embedding inside another container. -/// -/// Can also be used standalone via [`TupleBuilder::build`], which returns a -/// plain [`alias::PyObject`] (a Python `tuple`). pub struct TupleBuilder { inner: Option>, size: isize, current: isize, } +// private methods impl TupleBuilder { - /// Allocate a new tuple with `size` pre-allocated slots. - pub fn new(py: pyo3::Python<'_>, size: isize) -> pyo3::PyResult { - let raw = unsafe { pyo3::ffi::PyTuple_New(size) }; - if raw.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - Ok(Self { - inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), - size, - current: 0, - }) - } - /// Consume the builder and surrender ownership of the raw pointer to the /// caller (used internally to insert into a parent container). - pub(crate) fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { + fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { // Drop becomes a no-op because `inner` is now None. self.inner .take() @@ -409,25 +388,21 @@ impl TupleBuilder { self.current += 1; Ok(()) } +} - /// Finalise into a standalone Python tuple object. - /// - /// # Panics (debug only) - /// Panics if some slots were never filled. - pub fn build(mut self, py: pyo3::Python<'_>) -> alias::PyObject { - debug_assert_eq!( - self.current, - self.size, - "TupleBuilder::build called with {} unfilled slot(s)", - self.size - self.current, - ); - let ptr = self - .inner - .take() - .expect("TupleBuilder already consumed") - .as_ptr(); - let bound = unsafe { pyo3::Bound::from_owned_ptr(py, ptr) }; - bound.unbind() +impl TupleBuilder { + /// Allocate a new tuple with `size` pre-allocated slots. + pub fn new(py: pyo3::Python<'_>, size: isize) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyTuple_New(size) }; + if raw.is_null() { + Err(pyo3::PyErr::fetch(py)) + } else { + Ok(Self { + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + size, + current: 0, + }) + } } } @@ -452,26 +427,16 @@ pub struct ListBuilder { inner: Option>, } +// private methods impl ListBuilder { - /// Create a new, empty list. - pub fn new(py: pyo3::Python<'_>) -> pyo3::PyResult { - let raw = unsafe { pyo3::ffi::PyList_New(0) }; - if raw.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - Ok(Self { - inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), - }) - } - - pub(crate) fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { + fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { self.inner .take() .expect("ListBuilder already consumed") .as_ptr() } - /// # Reference-count contract + /// # Safety /// `PyList_Append` does **not** steal `item`; it increments `item`'s refcount /// on success. We therefore always decref our owned ref after the call, /// regardless of success or failure. @@ -488,16 +453,18 @@ impl ListBuilder { } Ok(()) } +} - /// Finalise into a standalone Python list object. - pub fn build(mut self, py: pyo3::Python<'_>) -> alias::PyObject { - let ptr = self - .inner - .take() - .expect("ListBuilder already consumed") - .as_ptr(); - let bound = unsafe { pyo3::Bound::from_owned_ptr(py, ptr) }; - bound.unbind() +impl ListBuilder { + /// Create a new, empty list. + pub fn new(py: pyo3::Python<'_>) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyList_New(0) }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + Ok(Self { + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + }) } } @@ -515,48 +482,23 @@ impl Drop for ListBuilder { /// Builds a Python dict. /// -/// Keys must be [`PyVal`] scalars (integers, floats, bools, strings, `None`). +/// Keys must be [`PyPickleVal`] scalars (integers, floats, bools, strings, `None`). /// Values may be scalars **or** nested containers built via the `entry_tuple`, /// `entry_list`, and `entry_dict` methods. -/// -/// # Example -/// -/// Reproduces `{3: 4, "a": 39, "AA": (3, 4)}`: -/// -/// ```rust,ignore -/// let obj = DictBuilder::new(py)? -/// .entry(py, 3isize, 4isize)? -/// .entry(py, "a", 39isize)? -/// .entry_tuple(py, "AA", 2, |t| { -/// t.push(py, 3isize)?.push(py, 4isize)?; -/// Ok(()) -/// })? -/// .build(py); -/// ``` pub struct DictBuilder { inner: Option>, } +// private methods impl DictBuilder { - /// Create a new, empty dict. - pub fn new(py: pyo3::Python<'_>) -> pyo3::PyResult { - let raw = unsafe { pyo3::ffi::PyDict_New() }; - if raw.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - Ok(Self { - inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), - }) - } - - pub(crate) fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { + fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { self.inner .take() .expect("DictBuilder already consumed") .as_ptr() } - /// # Reference-count contract + /// # Safety /// `PyDict_SetItem` does **not** steal either `key` or `val`. /// This helper takes ownership of both and decrefs them unconditionally. unsafe fn set_kv( @@ -576,14 +518,21 @@ impl DictBuilder { Ok(()) } } +} - /// Insert `key → val` where both are [`PyVal`] scalars. - /// - /// ```rust,ignore - /// d.entry(py, 3isize, 4isize)? - /// .entry(py, "name", "Alice")? - /// .entry(py, true, 1.0f64)?; - /// ``` +impl DictBuilder { + /// Create a new, empty dict. + pub fn new(py: pyo3::Python<'_>) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyDict_New() }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + Ok(Self { + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + }) + } + + /// Insert `key → val` where both are [`PyPickleVal`] scalars. pub fn entry<'k, 'v, K, V>( &mut self, py: pyo3::Python<'_>, @@ -591,8 +540,8 @@ impl DictBuilder { val: V, ) -> pyo3::PyResult<&mut Self> where - K: Into>, - V: Into>, + K: Into>, + V: Into>, { unsafe { let kptr = key.into().into_py_raw(py)?; @@ -609,13 +558,6 @@ impl DictBuilder { } /// Insert `key → (nested tuple)`. - /// - /// ```rust,ignore - /// d.entry_tuple(py, "coords", 2, |t| { - /// t.push(py, 10isize)?.push(py, 20isize)?; - /// Ok(()) - /// })?; - /// ``` pub fn entry_tuple<'k, K, F>( &mut self, py: pyo3::Python<'_>, @@ -624,7 +566,7 @@ impl DictBuilder { f: F, ) -> pyo3::PyResult<&mut Self> where - K: Into>, + K: Into>, F: FnOnce(&mut TupleBuilder) -> pyo3::PyResult<()>, { let mut b = TupleBuilder::new(py, size)?; @@ -651,7 +593,7 @@ impl DictBuilder { f: F, ) -> pyo3::PyResult<&mut Self> where - K: Into>, + K: Into>, F: FnOnce(&mut ListBuilder) -> pyo3::PyResult<()>, { let mut b = ListBuilder::new(py)?; @@ -678,7 +620,7 @@ impl DictBuilder { f: F, ) -> pyo3::PyResult<&mut Self> where - K: Into>, + K: Into>, F: FnOnce(&mut DictBuilder) -> pyo3::PyResult<()>, { let mut b = DictBuilder::new(py)?; @@ -696,17 +638,6 @@ impl DictBuilder { } Ok(self) } - - /// Finalise into a standalone Python dict object. - pub fn build(mut self, py: pyo3::Python<'_>) -> alias::PyObject { - let ptr = self - .inner - .take() - .expect("DictBuilder already consumed") - .as_ptr(); - let bound = unsafe { pyo3::Bound::from_owned_ptr(py, ptr) }; - bound.unbind() - } } impl Drop for DictBuilder { diff --git a/src/internal/utils.rs b/src/internal/utils.rs index 1499193..77544d0 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -395,6 +395,7 @@ impl From for alias::PyObject { } /// Holds and manage `getsizeof` function which is a callable used to measure the /// size of each key-value pair. +#[derive(pyo3::FromPyObject)] #[repr(transparent)] pub struct GetsizeofFunction(Option); diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index c26300a..4e8fd62 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -1,6 +1,7 @@ use std::collections::VecDeque; use crate::hashbrown; +use crate::internal::alias; use crate::internal::utils; use crate::policies::traits; use crate::policies::traits::HandleExt; @@ -242,6 +243,8 @@ impl PolicyExt for FIFOPolicy { where Self: 'a; + const PICKLE_SIZE: isize = 2; + #[inline] fn current_size(&self) -> usize { self.currsize @@ -390,4 +393,21 @@ impl PolicyExt for FIFOPolicy { front_offset: self.front_offset, } } + + fn build_pickle( + &self, + py: pyo3::Python, + tuple: &mut crate::internal::pickle::TupleBuilder, + ) -> pyo3::PyResult<()> { + todo!() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + todo!() + } } diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs index cac2555..804d619 100644 --- a/src/policies/lfupolicy.rs +++ b/src/policies/lfupolicy.rs @@ -307,6 +307,8 @@ impl PolicyExt for LFUPolicy { where Self: 'a; + const PICKLE_SIZE: isize = 1; + #[inline] fn current_size(&self) -> usize { self.currsize @@ -470,4 +472,21 @@ impl PolicyExt for LFUPolicy { currsize: self.currsize, } } + + fn build_pickle( + &self, + py: pyo3::Python, + tuple: &mut crate::internal::pickle::TupleBuilder, + ) -> pyo3::PyResult<()> { + todo!() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + todo!() + } } diff --git a/src/policies/lrupolicy.rs b/src/policies/lrupolicy.rs index e520678..989a4ad 100644 --- a/src/policies/lrupolicy.rs +++ b/src/policies/lrupolicy.rs @@ -1,4 +1,5 @@ use crate::hashbrown; +use crate::internal::alias; use crate::internal::linked_list; use crate::internal::utils; use crate::policies::traits; @@ -155,6 +156,8 @@ impl PolicyExt for LRUPolicy { where Self: 'a; + const PICKLE_SIZE: isize = 1; + #[inline] fn current_size(&self) -> usize { self.currsize @@ -310,4 +313,21 @@ impl PolicyExt for LRUPolicy { currsize: self.currsize, } } + + fn build_pickle( + &self, + py: pyo3::Python, + tuple: &mut crate::internal::pickle::TupleBuilder, + ) -> pyo3::PyResult<()> { + todo!() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + todo!() + } } diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index ad8ec27..cd8319a 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -1,4 +1,5 @@ use crate::hashbrown; +use crate::internal::alias; use crate::internal::utils; use crate::policies::traits; use crate::policies::traits::HandleExt; @@ -120,6 +121,8 @@ impl traits::PolicyExt for NoPolicy { where Self: 'a; + const PICKLE_SIZE: isize = 1; + #[inline] fn current_size(&self) -> usize { self.currsize @@ -238,4 +241,51 @@ impl traits::PolicyExt for NoPolicy { currsize: self.currsize, } } + + fn build_pickle( + &self, + py: pyo3::Python, + tuple: &mut crate::internal::pickle::TupleBuilder, + ) -> pyo3::PyResult<()> { + tuple.push_dict(py, |dict| unsafe { + for handle in self.table.iter().map(|x| x.as_ref()) { + dict.entry(py, handle.key().as_ref(), handle.value())?; + } + Ok(()) + })?; + Ok(()) + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + _global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyDictMethods; + use pyo3::types::PyTupleMethods; + + let dict = builded.get_item(0)?.cast_into::()?; + let dict_length = dict.len(); + + if dict_length > maxsize { + return Err(new_py_error!( + PyValueError, + "dict size is incompatible with maxsize" + )); + } + + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(dict.len()); + + for (key, value) in dict.iter() { + let handle = Handle::new(key.py(), shared.getsizeof(), key.unbind(), value.unbind())?; + + unsafe { + slf.table.insert_no_grow(handle.key().hash(), handle); + } + } + + Ok((shared, slf)) + } } diff --git a/src/policies/rrpolicy.rs b/src/policies/rrpolicy.rs index 0c56009..2948561 100644 --- a/src/policies/rrpolicy.rs +++ b/src/policies/rrpolicy.rs @@ -1,4 +1,5 @@ use crate::hashbrown; +use crate::internal::alias; use crate::internal::utils; use crate::policies::traits; use crate::policies::traits::HandleExt; @@ -122,6 +123,8 @@ impl PolicyExt for RRPolicy { where Self: 'a; + const PICKLE_SIZE: isize = 1; + #[inline] fn current_size(&self) -> usize { self.currsize @@ -249,4 +252,51 @@ impl PolicyExt for RRPolicy { currsize: self.currsize, } } + + fn build_pickle( + &self, + py: pyo3::Python, + tuple: &mut crate::internal::pickle::TupleBuilder, + ) -> pyo3::PyResult<()> { + tuple.push_dict(py, |dict| unsafe { + for handle in self.table.iter().map(|x| x.as_ref()) { + dict.entry(py, handle.key().as_ref(), handle.value())?; + } + Ok(()) + })?; + Ok(()) + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + _global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyDictMethods; + use pyo3::types::PyTupleMethods; + + let dict = builded.get_item(0)?.cast_into::()?; + let dict_length = dict.len(); + + if dict_length > maxsize { + return Err(new_py_error!( + PyValueError, + "dict size is incompatible with maxsize" + )); + } + + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(dict.len()); + + for (key, value) in dict.iter() { + let handle = Handle::new(key.py(), shared.getsizeof(), key.unbind(), value.unbind())?; + + unsafe { + slf.table.insert_no_grow(handle.key().hash(), handle); + } + } + + Ok((shared, slf)) + } } diff --git a/src/policies/traits.rs b/src/policies/traits.rs index 0f8a1e2..a214042 100644 --- a/src/policies/traits.rs +++ b/src/policies/traits.rs @@ -1,3 +1,5 @@ +use crate::internal::alias; +use crate::internal::pickle; use crate::internal::utils; pub trait HandleExt { @@ -76,7 +78,7 @@ pub trait SharedExt: Send + Sync { fn clone_ref(&self, py: pyo3::Python) -> Self; } -pub trait PolicyExt { +pub trait PolicyExt: Sized { /// Read-only variables, we keep this type separated from the main policy implementation, /// because we need to access them outside of `Mutex`s. type Shared: SharedExt; @@ -90,6 +92,8 @@ pub trait PolicyExt { where Self: 'a; + const PICKLE_SIZE: isize; + /// Returns the current total cumulative size consumed by all stored entries. fn current_size(&self) -> usize; @@ -134,4 +138,20 @@ pub trait PolicyExt { /// Make a clone of `self`. fn clone_ref(&mut self, py: pyo3::Python) -> Self; + + /// Buildes the pickle. + /// Should not add items to pickle more than the configured [`Self::PICKLE_SIZE`]. + fn build_pickle( + &self, + py: pyo3::Python, + tuple: &mut pickle::TupleBuilder, + ) -> pyo3::PyResult<()>; + + /// Loads the builded pickle. + fn from_pickle( + maxsize: usize, + getsizeof: Option, + global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)>; } diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index f51fb86..c874b7c 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -360,6 +360,7 @@ impl PolicyExt for TTLPolicy { = Vacant<'a> where Self: 'a; + const PICKLE_SIZE: isize = 1; #[inline] fn current_size(&self) -> usize { @@ -523,4 +524,21 @@ impl PolicyExt for TTLPolicy { front_offset: self.front_offset, } } + + fn build_pickle( + &self, + py: pyo3::Python, + tuple: &mut crate::internal::pickle::TupleBuilder, + ) -> pyo3::PyResult<()> { + todo!() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + todo!() + } } diff --git a/src/policies/vttlpolicy.rs b/src/policies/vttlpolicy.rs index d4aa7f4..4a4ea4f 100644 --- a/src/policies/vttlpolicy.rs +++ b/src/policies/vttlpolicy.rs @@ -300,6 +300,7 @@ impl PolicyExt for VTTLPolicy { = Vacant<'a> where Self: 'a; + const PICKLE_SIZE: isize = 1; #[inline] fn current_size(&self) -> usize { @@ -474,4 +475,21 @@ impl PolicyExt for VTTLPolicy { currsize: self.currsize, } } + + fn build_pickle( + &self, + py: pyo3::Python, + tuple: &mut crate::internal::pickle::TupleBuilder, + ) -> pyo3::PyResult<()> { + todo!() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + todo!() + } } diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs index 7d0c697..86f79aa 100644 --- a/src/policies/wrapped.rs +++ b/src/policies/wrapped.rs @@ -1,4 +1,8 @@ +use pyo3::types::PyAnyMethods; +use pyo3::types::PyTupleMethods; + use crate::internal::alias; +use crate::internal::pickle; use crate::policies::traits::HandleExt; use crate::policies::traits::OccupiedExt; use crate::policies::traits::PolicyEntry; @@ -251,4 +255,38 @@ impl Wrapped

{ inner: parking_lot::Mutex::new(policy), } } + + pub fn build_pickle(&self, py: pyo3::Python) -> pyo3::PyResult { + let mut builder = pickle::Pickle::builder(py, 4)?; + + let getsizeof: Option = self.shared.getsizeof().clone_ref(py).into(); + + builder + .push(py, self.shared.maxsize())? + .push(py, getsizeof)? + .push(py, self.shared.global_ttl())?; + + let policy = self.inner.lock(); + builder.push_tuple(py, P::PICKLE_SIZE, |tuple| policy.build_pickle(py, tuple))?; + + Ok(builder.finish(py)) + } +} + +impl Wrapped

{ + pub fn from_pickle(py: pyo3::Python<'_>, state: alias::PyObject) -> pyo3::PyResult { + let tuple = state.into_bound(py).cast_into::()?; + + let maxsize: usize = tuple.get_item(0)?.extract()?; + let getsizeof: Option = tuple.get_item(1)?.extract()?; + let global_ttl: Option = tuple.get_item(2)?.extract()?; + let builded = tuple.get_item(3)?.cast_into::()?; + + let (shared, inner) = P::from_pickle(maxsize, getsizeof, global_ttl, builded)?; + + Ok(Self { + shared, + inner: parking_lot::Mutex::new(inner), + }) + } } diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index cc15c40..945bc96 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -549,6 +549,17 @@ impl PyCache { self.copy(py) } + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { let inner = slf.0.get(); let shared = inner.shared(); diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index fce4de4..854448f 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -554,6 +554,17 @@ impl PyFIFOCache { self.copy(py) } + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { let inner = slf.0.get(); let shared = inner.shared(); diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs index 8423819..00cb001 100644 --- a/src/pyclasses/lfucache.rs +++ b/src/pyclasses/lfucache.rs @@ -593,6 +593,17 @@ impl PyLFUCache { self.copy(py) } + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { let inner = slf.0.get(); let shared = inner.shared(); diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs index 11714b8..46471b1 100644 --- a/src/pyclasses/lrucache.rs +++ b/src/pyclasses/lrucache.rs @@ -581,6 +581,17 @@ impl PyLRUCache { self.copy(py) } + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { let inner = slf.0.get(); let shared = inner.shared(); diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs index 59af05c..a7fe47d 100644 --- a/src/pyclasses/rrcache.rs +++ b/src/pyclasses/rrcache.rs @@ -554,6 +554,17 @@ impl PyRRCache { self.copy(py) } + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { let inner = slf.0.get(); let shared = inner.shared(); diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index b909c58..e51c44e 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -568,6 +568,17 @@ impl PyTTLCache { self.copy(py) } + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { let inner = slf.0.get(); let shared = inner.shared(); diff --git a/src/pyclasses/vttlcache.rs b/src/pyclasses/vttlcache.rs index ac90b4d..df01150 100644 --- a/src/pyclasses/vttlcache.rs +++ b/src/pyclasses/vttlcache.rs @@ -544,6 +544,17 @@ impl PyVTTLCache { self.copy(py) } + fn __getstate__(&self, py: pyo3::Python) -> pyo3::PyResult { + let inner = self.0.get(); + inner.build_pickle(py).map(|x| x.into()) + } + + fn __setstate__(&self, py: pyo3::Python, state: alias::PyObject) -> pyo3::PyResult<()> { + let wrapped = Wrapped::from_pickle(py, state)?; + self.0.set(wrapped); + Ok(()) + } + fn __repr__(slf: pyo3::PyRef<'_, Self>, py: pyo3::Python) -> String { let inner = slf.0.get(); let shared = inner.shared(); diff --git a/tests/mixins.py b/tests/mixins.py index 673e139..63a726f 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -1,4 +1,6 @@ +import copy as stdcopy import dataclasses +import pickle import sys import threading import time @@ -505,6 +507,69 @@ def test_copy_preserves_maxsize(self): c2 = cache.copy() assert c2.maxsize == cache.maxsize + def test_copy_is_shallow(self): + cache = self.create_cache() + + cache["A"] = [1, 2] + cache["B"] = {1: 1, 2: 2} + + c2 = cache.copy() + + assert len(cache["A"]) == 2 + assert len(cache["B"]) == 2 + assert len(c2["A"]) == 2 + assert len(c2["B"]) == 2 + + c2["A"].append(3) + c2["B"][3] = 3 + + assert len(cache["A"]) == 3 + assert len(cache["B"]) == 3 + assert len(c2["A"]) == 3 + assert len(c2["B"]) == 3 + + def test_deepcopy_has_same_items(self): + cache = self.create_cache() + + cache.update({"a": 1, "b": 2}) + c2 = stdcopy.deepcopy(cache) + assert set(c2.items()) == set(cache.items()) + + def test_deepcopy_is_independent(self): + cache = self.create_cache() + + cache.insert("a", 1) + c2 = stdcopy.deepcopy(cache) + c2.insert("b", 2) + assert not cache.contains("b") + + def test_deepcopy_preserves_maxsize(self): + cache = self.create_cache() + + c2 = stdcopy.deepcopy(cache) + assert c2.maxsize == cache.maxsize + + def test_deepcopy_is_not_shallow(self): + cache = self.create_cache() + + cache["A"] = [1, 2] + cache["B"] = {1: 1, 2: 2} + + c2 = stdcopy.deepcopy(cache) + + assert len(cache["A"]) == 2 + assert len(cache["B"]) == 2 + assert len(c2["A"]) == 2 + assert len(c2["B"]) == 2 + + c2["A"].append(3) + c2["B"][3] = 3 + + assert len(cache["A"]) == 2 + assert len(cache["B"]) == 2 + assert len(c2["A"]) == 3 + assert len(c2["B"]) == 3 + @dataclasses.dataclass class Sized: @@ -1002,6 +1067,32 @@ def test_fuzzy_chain_methods(self, key, value): assert c[key] == value assert c.pop(key) == value + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_fuzzy_getstate_setstate(self, pairs): + original = self.create_cache(20, pairs) + + if not hasattr(original, "__setstate__"): + pytest.skip(f"{original.__class__} doesn't support __setstate__") + + state = original.__getstate__() + + pickled = original.__class__.__new__(original.__class__) + pickled.__setstate__(state) + + assert pickled == original + + @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) + def test_fuzzy_pickle_loads_dumps(self, pairs): + original = self.create_cache(20, pairs) + + if not hasattr(original, "__setstate__"): + pytest.skip(f"{original.__class__} doesn't support pickle") + + state = pickle.dumps(original) + pickled = pickle.loads(state) + + assert pickled == original + class BenchmarkMixin(BaseMixin): @pytest.fixture(autouse=True) From 412f60866f615e463e4e277a08980feee1f16cda Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 29 May 2026 12:42:45 +0330 Subject: [PATCH 33/60] Fix Cache/RRCache pickle loads & Support pickle for FIFOCache. - Refactor internal pickle helper - Support __getstate__ & __setstate__ for FIFOCache - Fix Cache/RRCache.__setstate__ --- src/internal/pickle.rs | 622 +++++++++++++++++-------------------- src/policies/fifopolicy.rs | 61 +++- src/policies/lfupolicy.rs | 19 +- src/policies/lrupolicy.rs | 20 +- src/policies/nopolicy.rs | 22 +- src/policies/rrpolicy.rs | 22 +- src/policies/traits.rs | 13 +- src/policies/ttlpolicy.rs | 19 +- src/policies/vttlpolicy.rs | 19 +- src/policies/wrapped.rs | 14 +- tests/mixins.py | 4 + 11 files changed, 397 insertions(+), 438 deletions(-) diff --git a/src/internal/pickle.rs b/src/internal/pickle.rs index 2a92842..2864bc1 100644 --- a/src/internal/pickle.rs +++ b/src/internal/pickle.rs @@ -79,7 +79,7 @@ impl<'a> PyPickleVal<'a> { /// /// # Safety /// The caller is responsible for exactly one `Py_DECREF` (or transferring ownership to a container). - unsafe fn into_py_raw(self, py: pyo3::Python<'_>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { + unsafe fn into_raw(self, py: pyo3::Python<'_>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { let ptr = match self { Self::Owned(v) => v.into_ptr(), Self::Borrowed(v) => { @@ -128,8 +128,8 @@ pub struct Pickle(alias::PyObject); impl Pickle { /// Begin building a top-level pickle tuple with exactly `size` slots. #[inline] - pub fn builder(py: pyo3::Python<'_>, size: usize) -> pyo3::PyResult { - PickleBuilder::new(py, size as isize) + pub fn builder<'py>(py: pyo3::Python<'py>, size: usize) -> pyo3::PyResult> { + PickleBuilder::new(py, size) } /// Borrow the inner [`alias::PyObject`] without consuming `self`. @@ -162,107 +162,38 @@ impl From for alias::PyObject { } } -/// Most of builders expose the same `push` / `push_tuple` / `push_list` / `push_dict` surface. -/// Rather than repeating three times, generate them with a macro. -/// -/// Each builder must provide a method: -/// ```ignore -/// unsafe fn push_owned_impl( -/// &mut self, -/// py: pyo3::Python<'_>, -/// item: *mut pyo3::ffi::PyObject, // caller hands over ownership -/// ) -> pyo3::PyResult<()> -/// ``` -macro_rules! impl_push_methods { - ($ty:ident) => { - impl $ty { - /// Push a scalar [`PyPickleVal`] (or anything that converts `Into`). - /// - /// ```rust,ignore - /// builder.push(py, 42isize)? - /// .push(py, "hello")? - /// .push(py, 3.14f64)?; - /// ``` - pub fn push<'a, V>(&mut self, py: pyo3::Python<'_>, val: V) -> pyo3::PyResult<&mut Self> - where - V: Into>, - { - let raw = unsafe { val.into().into_py_raw(py)? }; - unsafe { - self.push_owned_impl(py, raw)?; - } - Ok(self) - } - - /// Push a nested tuple whose items are filled by the closure `f`. - /// - /// `size` must equal the exact number of items `f` will push. - /// - /// ```rust,ignore - /// builder.push_tuple(py, 2, |t| { - /// t.push(py, 3isize)?.push(py, 4isize)?; - /// Ok(()) - /// })?; - /// ``` - pub fn push_tuple( - &mut self, - py: pyo3::Python<'_>, - size: isize, - f: F, - ) -> pyo3::PyResult<&mut Self> - where - F: FnOnce(&mut TupleBuilder) -> pyo3::PyResult<()>, - { - let mut b = TupleBuilder::new(py, size)?; - f(&mut b)?; - // into_raw transfers ownership; Drop becomes a no-op. - unsafe { - self.push_owned_impl(py, b.into_raw())?; - } - Ok(self) - } +mod sealed { + /// Accepts a single raw owned pointer from a finished child builder. + pub trait Receive { + /// # Safety + /// `item` must have refcount == 1; ownership is fully transferred. + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()>; + } +} - /// Push a nested list whose items are filled by the closure `f`. - /// - /// ```rust,ignore - /// builder.push_list(py, |l| { - /// l.push(py, 1isize)?.push(py, "A")?; - /// Ok(()) - /// })?; - /// ``` - pub fn push_list(&mut self, py: pyo3::Python<'_>, f: F) -> pyo3::PyResult<&mut Self> - where - F: FnOnce(&mut ListBuilder) -> pyo3::PyResult<()>, - { - let mut b = ListBuilder::new(py)?; - f(&mut b)?; - unsafe { - self.push_owned_impl(py, b.into_raw())?; - } - Ok(self) - } +pub trait Builder: Sized + sealed::Receive { + fn py(&self) -> pyo3::Python<'_>; - /// Push a nested dict whose entries are filled by the closure `f`. - /// - /// ```rust,ignore - /// builder.push_dict(py, |d| { - /// d.entry(py, "key", 42isize)?; - /// Ok(()) - /// })?; - /// ``` - pub fn push_dict(&mut self, py: pyo3::Python<'_>, f: F) -> pyo3::PyResult<&mut Self> - where - F: FnOnce(&mut DictBuilder) -> pyo3::PyResult<()>, - { - let mut b = DictBuilder::new(py)?; - f(&mut b)?; - unsafe { - self.push_owned_impl(py, b.into_raw())?; - } - Ok(self) - } + fn push<'a, V: Into>>(&mut self, val: V) -> pyo3::PyResult<&mut Self> { + let raw = unsafe { val.into().into_raw(self.py())? }; + unsafe { + self.receive(raw)?; } - }; + + Ok(self) + } + + fn begin_tuple<'a>(&'a mut self, size: usize) -> pyo3::PyResult> { + TupleBuilder::new(self, size) + } + + fn begin_list<'a>(&'a mut self) -> pyo3::PyResult> { + ListBuilder::new(self) + } + + fn begin_dict<'a>(&'a mut self) -> pyo3::PyResult> { + DictBuilder::new(self) + } } /// Builds the top-level Python tuple that represents a pickle state. @@ -273,36 +204,41 @@ macro_rules! impl_push_methods { /// /// If the builder is dropped before `finish` is called, the partially-built /// tuple is correctly decreffed and all already-inserted items are released. -pub struct PickleBuilder { - /// `None` only after `finish()` has transferred ownership. +pub struct PickleBuilder<'py> { + py: pyo3::Python<'py>, inner: Option>, size: isize, current: isize, } -// private methods -impl PickleBuilder { - fn new(py: pyo3::Python<'_>, size: isize) -> pyo3::PyResult { - let raw = unsafe { pyo3::ffi::PyTuple_New(size) }; +impl<'py> PickleBuilder<'py> { + fn new(py: pyo3::Python<'py>, size: usize) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyTuple_New(size as isize) }; if raw.is_null() { - Err(pyo3::PyErr::fetch(py)) - } else { - Ok(Self { - inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), - size, - current: 0, - }) + return Err(pyo3::PyErr::fetch(py)); } + Ok(Self { + py, + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + size: size as isize, + current: 0, + }) } - /// # Safety - /// `PyTuple_SetItem` **steals** `item` on success and **decrefs** it on - /// failure, so this function must not touch `item`'s refcount after the call. - unsafe fn push_owned_impl( - &mut self, - py: pyo3::Python<'_>, - item: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { + pub fn finish(mut self) -> Pickle { + debug_assert_eq!( + self.current, + self.size, + "PickleBuilder::finish: {} unfilled slot(s)", + self.size - self.current + ); + let ptr = self.inner.take().expect("already consumed").as_ptr(); + Pickle(unsafe { pyo3::Bound::from_owned_ptr(self.py, ptr) }.unbind()) + } +} + +impl sealed::Receive for PickleBuilder<'_> { + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()> { debug_assert!( self.current < self.size, "PickleBuilder: pushed more items than `size`" @@ -310,39 +246,21 @@ impl PickleBuilder { let ptr = self.inner.expect("PickleBuilder already consumed").as_ptr(); if pyo3::ffi::PyTuple_SetItem(ptr, self.current, item) != 0 { // item was already decreffed by PyTuple_SetItem on failure - return Err(pyo3::PyErr::fetch(py)); + return Err(pyo3::PyErr::fetch(self.py)); } self.current += 1; Ok(()) } } -impl PickleBuilder { - /// Finalise the builder into a [`Pickle`]. - pub fn finish(mut self, py: pyo3::Python<'_>) -> Pickle { - debug_assert_eq!( - self.current, - self.size, - "PickleBuilder::finish called with {} unfilled slot(s)", - self.size - self.current, - ); - - // Take ownership - // `.take()` makes Drop no-op - let ptr = self - .inner - .take() - .expect("PickleBuilder already consumed") - .as_ptr(); - - let bound = unsafe { pyo3::Bound::from_owned_ptr(py, ptr) }; - Pickle(bound.unbind()) +impl<'py> Builder for PickleBuilder<'py> { + #[inline] + fn py(&self) -> pyo3::Python<'py> { + self.py } } -impl_push_methods!(PickleBuilder); - -impl Drop for PickleBuilder { +impl Drop for PickleBuilder<'_> { fn drop(&mut self) { // Releases the tuple and all items already inserted into it. if let Some(nn) = self.inner.take() { @@ -353,62 +271,68 @@ impl Drop for PickleBuilder { } } -/// Builds a Python tuple for embedding inside another container. -pub struct TupleBuilder { +pub struct TupleBuilder<'a, P: Builder> { + parent: &'a mut P, inner: Option>, size: isize, current: isize, } -// private methods -impl TupleBuilder { - /// Consume the builder and surrender ownership of the raw pointer to the - /// caller (used internally to insert into a parent container). - fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { - // Drop becomes a no-op because `inner` is now None. - self.inner - .take() - .expect("TupleBuilder already consumed") - .as_ptr() - } - - unsafe fn push_owned_impl( - &mut self, - py: pyo3::Python<'_>, - item: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - debug_assert!( - self.current < self.size, - "TupleBuilder: pushed more items than `size`" +impl<'a, P: Builder> TupleBuilder<'a, P> { + fn new(parent: &'a mut P, size: usize) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyTuple_New(size as isize) }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(parent.py())); + } + + Ok(Self { + parent, + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + size: size as isize, + current: 0, + }) + } + + #[inline] + pub fn end(mut self) -> pyo3::PyResult<()> { + debug_assert_eq!( + self.current, + self.size, + "TupleBuilder::end: {} unfilled slot(s)", + self.size - self.current ); - let ptr = self.inner.expect("TupleBuilder already consumed").as_ptr(); - if pyo3::ffi::PyTuple_SetItem(ptr, self.current, item) != 0 { - return Err(pyo3::PyErr::fetch(py)); + let item = self.inner.take().expect("already consumed").as_ptr(); + unsafe { + self.parent.receive(item)?; } - self.current += 1; Ok(()) } } -impl TupleBuilder { - /// Allocate a new tuple with `size` pre-allocated slots. - pub fn new(py: pyo3::Python<'_>, size: isize) -> pyo3::PyResult { - let raw = unsafe { pyo3::ffi::PyTuple_New(size) }; - if raw.is_null() { - Err(pyo3::PyErr::fetch(py)) - } else { - Ok(Self { - inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), - size, - current: 0, - }) +impl sealed::Receive for TupleBuilder<'_, P> { + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()> { + debug_assert!(self.current < self.size, "TupleBuilder: too many items"); + if pyo3::ffi::PyTuple_SetItem( + self.inner.expect("already consumed").as_ptr(), + self.current, + item, + ) != 0 + { + return Err(pyo3::PyErr::fetch(self.parent.py())); } + self.current += 1; + Ok(()) } } -impl_push_methods!(TupleBuilder); +impl Builder for TupleBuilder<'_, P> { + #[inline] + fn py(&self) -> pyo3::Python<'_> { + self.parent.py() + } +} -impl Drop for TupleBuilder { +impl Drop for TupleBuilder<'_, P> { fn drop(&mut self) { if let Some(nn) = self.inner.take() { unsafe { @@ -418,59 +342,53 @@ impl Drop for TupleBuilder { } } -/// Builds a Python list of arbitrary length. -/// -/// Unlike [`TupleBuilder`], no size is required upfront; items are appended -/// one by one via [`PyList_Append`]. -pub struct ListBuilder { - /// `None` only after `into_raw()` or `build()`. +pub struct ListBuilder<'a, P: Builder> { + parent: &'a mut P, inner: Option>, } -// private methods -impl ListBuilder { - fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { - self.inner - .take() - .expect("ListBuilder already consumed") - .as_ptr() +impl<'a, P: Builder> ListBuilder<'a, P> { + fn new(parent: &'a mut P) -> pyo3::PyResult { + let raw = unsafe { pyo3::ffi::PyList_New(0) }; + if raw.is_null() { + return Err(pyo3::PyErr::fetch(parent.py())); + } + Ok(Self { + parent, + inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), + }) } - /// # Safety - /// `PyList_Append` does **not** steal `item`; it increments `item`'s refcount - /// on success. We therefore always decref our owned ref after the call, - /// regardless of success or failure. - unsafe fn push_owned_impl( - &mut self, - py: pyo3::Python<'_>, - item: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - let ptr = self.inner.expect("ListBuilder already consumed").as_ptr(); - let result = pyo3::ffi::PyList_Append(ptr, item); - pyo3::ffi::Py_DECREF(item); // release our owned ref in all cases - if result != 0 { - return Err(pyo3::PyErr::fetch(py)); + #[inline] + pub fn end(mut self) -> pyo3::PyResult<()> { + let item = self.inner.take().expect("already consumed").as_ptr(); + unsafe { + self.parent.receive(item)?; } Ok(()) } } -impl ListBuilder { - /// Create a new, empty list. - pub fn new(py: pyo3::Python<'_>) -> pyo3::PyResult { - let raw = unsafe { pyo3::ffi::PyList_New(0) }; - if raw.is_null() { - return Err(pyo3::PyErr::fetch(py)); +impl sealed::Receive for ListBuilder<'_, P> { + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()> { + let rc = pyo3::ffi::PyList_Append(self.inner.expect("already consumed").as_ptr(), item); + pyo3::ffi::Py_DECREF(item); // PyList_Append does not steal + if rc != 0 { + Err(pyo3::PyErr::fetch(self.parent.py())) + } else { + Ok(()) } - Ok(Self { - inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), - }) } } -impl_push_methods!(ListBuilder); +impl Builder for ListBuilder<'_, P> { + #[inline] + fn py(&self) -> pyo3::Python<'_> { + self.parent.py() + } +} -impl Drop for ListBuilder { +impl Drop for ListBuilder<'_, P> { fn drop(&mut self) { if let Some(nn) = self.inner.take() { unsafe { @@ -480,172 +398,214 @@ impl Drop for ListBuilder { } } -/// Builds a Python dict. -/// -/// Keys must be [`PyPickleVal`] scalars (integers, floats, bools, strings, `None`). -/// Values may be scalars **or** nested containers built via the `entry_tuple`, -/// `entry_list`, and `entry_dict` methods. -pub struct DictBuilder { +pub struct DictBuilder<'a, P: Builder> { + parent: &'a mut P, inner: Option>, } -// private methods -impl DictBuilder { - fn into_raw(mut self) -> *mut pyo3::ffi::PyObject { - self.inner - .take() - .expect("DictBuilder already consumed") - .as_ptr() - } - - /// # Safety - /// `PyDict_SetItem` does **not** steal either `key` or `val`. - /// This helper takes ownership of both and decrefs them unconditionally. - unsafe fn set_kv( - &mut self, - py: pyo3::Python<'_>, - key: *mut pyo3::ffi::PyObject, - val: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - let ptr = self.inner.expect("DictBuilder already consumed").as_ptr(); - let result = pyo3::ffi::PyDict_SetItem(ptr, key, val); - // Always release our owned refs regardless of success/failure. - pyo3::ffi::Py_DECREF(key); - pyo3::ffi::Py_DECREF(val); - if result != 0 { - Err(pyo3::PyErr::fetch(py)) - } else { - Ok(()) - } - } -} - -impl DictBuilder { - /// Create a new, empty dict. - pub fn new(py: pyo3::Python<'_>) -> pyo3::PyResult { +impl<'a, P: Builder> DictBuilder<'a, P> { + fn new(parent: &'a mut P) -> pyo3::PyResult { let raw = unsafe { pyo3::ffi::PyDict_New() }; if raw.is_null() { - return Err(pyo3::PyErr::fetch(py)); + return Err(pyo3::PyErr::fetch(parent.py())); } Ok(Self { + parent, inner: Some(unsafe { ptr::NonNull::new_unchecked(raw) }), }) } - /// Insert `key → val` where both are [`PyPickleVal`] scalars. - pub fn entry<'k, 'v, K, V>( - &mut self, - py: pyo3::Python<'_>, - key: K, - val: V, - ) -> pyo3::PyResult<&mut Self> + pub fn entry<'k, 'v, K, V>(&mut self, key: K, val: V) -> pyo3::PyResult<&mut Self> where K: Into>, V: Into>, { - unsafe { - let kptr = key.into().into_py_raw(py)?; - let vptr = match val.into().into_py_raw(py) { + let kptr = unsafe { key.into().into_raw(self.parent.py())? }; + let vptr = unsafe { + match val.into().into_raw(self.parent.py()) { Ok(v) => v, Err(e) => { - pyo3::ffi::Py_DECREF(kptr); // clean up key we already allocated + pyo3::ffi::Py_DECREF(kptr); return Err(e); } - }; - self.set_kv(py, kptr, vptr)?; + } + }; + unsafe { + self.set_kv(kptr, vptr)?; } Ok(self) } - /// Insert `key → (nested tuple)`. - pub fn entry_tuple<'k, K, F>( + #[inline] + pub fn end(mut self) -> pyo3::PyResult<()> { + let item = self.inner.take().expect("already consumed").as_ptr(); + unsafe { + self.parent.receive(item)?; + } + Ok(()) + } + + unsafe fn set_kv( &mut self, - py: pyo3::Python<'_>, - key: K, - size: isize, - f: F, - ) -> pyo3::PyResult<&mut Self> + key: *mut pyo3::ffi::PyObject, + val: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + let rc = + pyo3::ffi::PyDict_SetItem(self.inner.expect("already consumed").as_ptr(), key, val); + pyo3::ffi::Py_DECREF(key); + pyo3::ffi::Py_DECREF(val); + if rc != 0 { + Err(pyo3::PyErr::fetch(self.parent.py())) + } else { + Ok(()) + } + } +} + +// DictBuilder also implements Builder so that begin_tuple/list/dict work +// as value-builders inside a dict value context. +impl sealed::Receive for DictBuilder<'_, P> { + #[inline] + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()> { + pyo3::ffi::Py_DECREF(item); + Err(pyo3::exceptions::PyTypeError::new_err( + "use entry() or entry_*() to insert into a DictBuilder", + )) + } +} + +impl Builder for DictBuilder<'_, P> { + #[inline] + fn py(&self) -> pyo3::Python<'_> { + self.parent.py() + } +} + +impl Drop for DictBuilder<'_, P> { + fn drop(&mut self) { + if let Some(nn) = self.inner.take() { + unsafe { + pyo3::ffi::Py_DECREF(nn.as_ptr()); + } + } + } +} + +impl<'a, P: Builder> DictBuilder<'a, P> { + pub fn entry_tuple<'k, K, F>(&mut self, key: K, size: usize, f: F) -> pyo3::PyResult<&mut Self> where K: Into>, - F: FnOnce(&mut TupleBuilder) -> pyo3::PyResult<()>, + F: FnOnce(&mut TupleBuilder) -> pyo3::PyResult<()>, { - let mut b = TupleBuilder::new(py, size)?; - f(&mut b)?; - let vptr = b.into_raw(); // transfer ownership out of TupleBuilder - unsafe { - let kptr = match key.into().into_py_raw(py) { + let mut sink = Sink( + // SAFETY: the GIL is held for the entire lifetime of this builder because + // the root PickleBuilder<'py> (which does own the 'py borrow) is kept alive + // as our `parent`. + unsafe { std::mem::transmute(self.parent.py()) }, + ); + + let vptr = { + let mut b = TupleBuilder::new(&mut sink, size)?; + f(&mut b)?; + b.inner.take().expect("already consumed").as_ptr() + }; + + let kptr = unsafe { + match key.into().into_raw(self.parent.py()) { Ok(k) => k, Err(e) => { - pyo3::ffi::Py_DECREF(vptr); // release value we built + pyo3::ffi::Py_DECREF(vptr); return Err(e); } - }; - self.set_kv(py, kptr, vptr)?; + } + }; + + unsafe { + self.set_kv(kptr, vptr)?; } Ok(self) } - /// Insert `key → [nested list]`. - pub fn entry_list<'k, K, F>( - &mut self, - py: pyo3::Python<'_>, - key: K, - f: F, - ) -> pyo3::PyResult<&mut Self> + pub fn entry_list<'k, K, F>(&mut self, key: K, f: F) -> pyo3::PyResult<&mut Self> where K: Into>, - F: FnOnce(&mut ListBuilder) -> pyo3::PyResult<()>, + F: FnOnce(&mut ListBuilder) -> pyo3::PyResult<()>, { - let mut b = ListBuilder::new(py)?; - f(&mut b)?; - let vptr = b.into_raw(); - unsafe { - let kptr = match key.into().into_py_raw(py) { + let mut sink = Sink( + // SAFETY: the GIL is held for the entire lifetime of this builder because + // the root PickleBuilder<'py> (which does own the 'py borrow) is kept alive + // as our `parent`. + unsafe { std::mem::transmute(self.parent.py()) }, + ); + + let vptr = { + let mut b = ListBuilder::new(&mut sink)?; + f(&mut b)?; + b.inner.take().expect("already consumed").as_ptr() + }; + let kptr = unsafe { + match key.into().into_raw(self.parent.py()) { Ok(k) => k, Err(e) => { pyo3::ffi::Py_DECREF(vptr); return Err(e); } - }; - self.set_kv(py, kptr, vptr)?; + } + }; + unsafe { + self.set_kv(kptr, vptr)?; } Ok(self) } - /// Insert `key → {nested dict}`. - pub fn entry_dict<'k, K, F>( - &mut self, - py: pyo3::Python<'_>, - key: K, - f: F, - ) -> pyo3::PyResult<&mut Self> + pub fn entry_dict<'k, K, F>(&mut self, key: K, f: F) -> pyo3::PyResult<&mut Self> where K: Into>, - F: FnOnce(&mut DictBuilder) -> pyo3::PyResult<()>, + F: FnOnce(&mut DictBuilder) -> pyo3::PyResult<()>, { - let mut b = DictBuilder::new(py)?; - f(&mut b)?; - let vptr = b.into_raw(); - unsafe { - let kptr = match key.into().into_py_raw(py) { + let mut sink = Sink( + // SAFETY: the GIL is held for the entire lifetime of this builder because + // the root PickleBuilder<'py> (which does own the 'py borrow) is kept alive + // as our `parent`. + unsafe { std::mem::transmute(self.parent.py()) }, + ); + + let vptr = { + let mut b = DictBuilder::new(&mut sink)?; + f(&mut b)?; + b.inner.take().expect("already consumed").as_ptr() + }; + let kptr = unsafe { + match key.into().into_raw(self.parent.py()) { Ok(k) => k, Err(e) => { pyo3::ffi::Py_DECREF(vptr); return Err(e); } - }; - self.set_kv(py, kptr, vptr)?; + } + }; + unsafe { + self.set_kv(kptr, vptr)?; } Ok(self) } } -impl Drop for DictBuilder { - fn drop(&mut self) { - if let Some(nn) = self.inner.take() { - unsafe { - pyo3::ffi::Py_DECREF(nn.as_ptr()); - } - } +/// A parent that simply discards the pointer it receives. +/// Used only inside `entry_*` closures where the container +/// extracts the raw pointer directly before `end()` is called. +pub struct Sink(pyo3::Python<'static>); + +impl sealed::Receive for Sink { + unsafe fn receive(&mut self, item: *mut pyo3::ffi::PyObject) -> pyo3::PyResult<()> { + pyo3::ffi::Py_DECREF(item); + Ok(()) + } +} + +impl Builder for Sink { + #[inline] + fn py(&self) -> pyo3::Python<'_> { + self.0 } } diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index 4e8fd62..9622f30 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -1,7 +1,10 @@ use std::collections::VecDeque; +use pyo3::types::PyAnyMethods; + use crate::hashbrown; use crate::internal::alias; +use crate::internal::pickle::Builder; use crate::internal::utils; use crate::policies::traits; use crate::policies::traits::HandleExt; @@ -243,7 +246,7 @@ impl PolicyExt for FIFOPolicy { where Self: 'a; - const PICKLE_SIZE: isize = 2; + const PICKLE_SIZE: usize = 1; #[inline] fn current_size(&self) -> usize { @@ -396,18 +399,62 @@ impl PolicyExt for FIFOPolicy { fn build_pickle( &self, - py: pyo3::Python, - tuple: &mut crate::internal::pickle::TupleBuilder, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, ) -> pyo3::PyResult<()> { - todo!() + let mut list = tuple.begin_list()?; + + for handle in self.entries.iter() { + let mut tuple = list.begin_tuple(2)?; + tuple.push(handle.key().as_ref())?; + tuple.push(handle.value())?; + tuple.end()?; + } + + list.end() } fn from_pickle( maxsize: usize, - getsizeof: Option, - global_ttl: Option, + getsizeof: Option, + _global_ttl: Option, builded: pyo3::Bound<'_, pyo3::types::PyTuple>, ) -> pyo3::PyResult<(Self::Shared, Self)> { - todo!() + use pyo3::types::PyListMethods; + use pyo3::types::PyTupleMethods; + + let list = builded.get_item(0)?.cast_into::()?; + let list_length = list.len(); + + if list_length > maxsize { + return Err(new_py_error!( + PyValueError, + "list size is incompatible with maxsize" + )); + } + + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(list.len()); + + for bound in list.iter() { + let (key, value) = bound.extract::<(alias::PyObject, alias::PyObject)>()?; + + let handle = Handle::new(bound.py(), shared.getsizeof(), key, value)?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + + unsafe { + slf.table.insert_no_grow( + handle.key().hash(), + // Adding `slf.front_offset` is unnecessary here + slf.entries.len(), + ); + } + slf.entries.push_back(handle); + } + + Ok((shared, slf)) } } diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs index 804d619..a5ba950 100644 --- a/src/policies/lfupolicy.rs +++ b/src/policies/lfupolicy.rs @@ -307,7 +307,7 @@ impl PolicyExt for LFUPolicy { where Self: 'a; - const PICKLE_SIZE: isize = 1; + const PICKLE_SIZE: usize = 1; #[inline] fn current_size(&self) -> usize { @@ -472,21 +472,4 @@ impl PolicyExt for LFUPolicy { currsize: self.currsize, } } - - fn build_pickle( - &self, - py: pyo3::Python, - tuple: &mut crate::internal::pickle::TupleBuilder, - ) -> pyo3::PyResult<()> { - todo!() - } - - fn from_pickle( - maxsize: usize, - getsizeof: Option, - global_ttl: Option, - builded: pyo3::Bound<'_, pyo3::types::PyTuple>, - ) -> pyo3::PyResult<(Self::Shared, Self)> { - todo!() - } } diff --git a/src/policies/lrupolicy.rs b/src/policies/lrupolicy.rs index 989a4ad..5b49be1 100644 --- a/src/policies/lrupolicy.rs +++ b/src/policies/lrupolicy.rs @@ -1,5 +1,4 @@ use crate::hashbrown; -use crate::internal::alias; use crate::internal::linked_list; use crate::internal::utils; use crate::policies::traits; @@ -156,7 +155,7 @@ impl PolicyExt for LRUPolicy { where Self: 'a; - const PICKLE_SIZE: isize = 1; + const PICKLE_SIZE: usize = 1; #[inline] fn current_size(&self) -> usize { @@ -313,21 +312,4 @@ impl PolicyExt for LRUPolicy { currsize: self.currsize, } } - - fn build_pickle( - &self, - py: pyo3::Python, - tuple: &mut crate::internal::pickle::TupleBuilder, - ) -> pyo3::PyResult<()> { - todo!() - } - - fn from_pickle( - maxsize: usize, - getsizeof: Option, - global_ttl: Option, - builded: pyo3::Bound<'_, pyo3::types::PyTuple>, - ) -> pyo3::PyResult<(Self::Shared, Self)> { - todo!() - } } diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index cd8319a..585878f 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -1,5 +1,6 @@ use crate::hashbrown; use crate::internal::alias; +use crate::internal::pickle::Builder; use crate::internal::utils; use crate::policies::traits; use crate::policies::traits::HandleExt; @@ -121,7 +122,7 @@ impl traits::PolicyExt for NoPolicy { where Self: 'a; - const PICKLE_SIZE: isize = 1; + const PICKLE_SIZE: usize = 1; #[inline] fn current_size(&self) -> usize { @@ -244,16 +245,20 @@ impl traits::PolicyExt for NoPolicy { fn build_pickle( &self, - py: pyo3::Python, - tuple: &mut crate::internal::pickle::TupleBuilder, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, ) -> pyo3::PyResult<()> { - tuple.push_dict(py, |dict| unsafe { + let mut dict = tuple.begin_dict()?; + + unsafe { for handle in self.table.iter().map(|x| x.as_ref()) { - dict.entry(py, handle.key().as_ref(), handle.value())?; + dict.entry(handle.key().as_ref(), handle.value())?; } - Ok(()) - })?; - Ok(()) + } + + dict.end() } fn from_pickle( @@ -281,6 +286,7 @@ impl traits::PolicyExt for NoPolicy { for (key, value) in dict.iter() { let handle = Handle::new(key.py(), shared.getsizeof(), key.unbind(), value.unbind())?; + slf.currsize = slf.currsize.saturating_add(handle.size()); unsafe { slf.table.insert_no_grow(handle.key().hash(), handle); } diff --git a/src/policies/rrpolicy.rs b/src/policies/rrpolicy.rs index 2948561..7797641 100644 --- a/src/policies/rrpolicy.rs +++ b/src/policies/rrpolicy.rs @@ -1,5 +1,6 @@ use crate::hashbrown; use crate::internal::alias; +use crate::internal::pickle::Builder; use crate::internal::utils; use crate::policies::traits; use crate::policies::traits::HandleExt; @@ -123,7 +124,7 @@ impl PolicyExt for RRPolicy { where Self: 'a; - const PICKLE_SIZE: isize = 1; + const PICKLE_SIZE: usize = 1; #[inline] fn current_size(&self) -> usize { @@ -255,16 +256,20 @@ impl PolicyExt for RRPolicy { fn build_pickle( &self, - py: pyo3::Python, - tuple: &mut crate::internal::pickle::TupleBuilder, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, ) -> pyo3::PyResult<()> { - tuple.push_dict(py, |dict| unsafe { + let mut dict = tuple.begin_dict()?; + + unsafe { for handle in self.table.iter().map(|x| x.as_ref()) { - dict.entry(py, handle.key().as_ref(), handle.value())?; + dict.entry(handle.key().as_ref(), handle.value())?; } - Ok(()) - })?; - Ok(()) + } + + dict.end() } fn from_pickle( @@ -292,6 +297,7 @@ impl PolicyExt for RRPolicy { for (key, value) in dict.iter() { let handle = Handle::new(key.py(), shared.getsizeof(), key.unbind(), value.unbind())?; + slf.currsize = slf.currsize.saturating_add(handle.size()); unsafe { slf.table.insert_no_grow(handle.key().hash(), handle); } diff --git a/src/policies/traits.rs b/src/policies/traits.rs index a214042..adef42b 100644 --- a/src/policies/traits.rs +++ b/src/policies/traits.rs @@ -92,7 +92,7 @@ pub trait PolicyExt: Sized { where Self: 'a; - const PICKLE_SIZE: isize; + const PICKLE_SIZE: usize; /// Returns the current total cumulative size consumed by all stored entries. fn current_size(&self) -> usize; @@ -143,9 +143,10 @@ pub trait PolicyExt: Sized { /// Should not add items to pickle more than the configured [`Self::PICKLE_SIZE`]. fn build_pickle( &self, - py: pyo3::Python, - tuple: &mut pickle::TupleBuilder, - ) -> pyo3::PyResult<()>; + tuple: &mut pickle::TupleBuilder<'_, pickle::PickleBuilder>, + ) -> pyo3::PyResult<()> { + todo!() + } /// Loads the builded pickle. fn from_pickle( @@ -153,5 +154,7 @@ pub trait PolicyExt: Sized { getsizeof: Option, global_ttl: Option, builded: pyo3::Bound<'_, pyo3::types::PyTuple>, - ) -> pyo3::PyResult<(Self::Shared, Self)>; + ) -> pyo3::PyResult<(Self::Shared, Self)> { + todo!() + } } diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index c874b7c..c0b22ed 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -360,7 +360,7 @@ impl PolicyExt for TTLPolicy { = Vacant<'a> where Self: 'a; - const PICKLE_SIZE: isize = 1; + const PICKLE_SIZE: usize = 1; #[inline] fn current_size(&self) -> usize { @@ -524,21 +524,4 @@ impl PolicyExt for TTLPolicy { front_offset: self.front_offset, } } - - fn build_pickle( - &self, - py: pyo3::Python, - tuple: &mut crate::internal::pickle::TupleBuilder, - ) -> pyo3::PyResult<()> { - todo!() - } - - fn from_pickle( - maxsize: usize, - getsizeof: Option, - global_ttl: Option, - builded: pyo3::Bound<'_, pyo3::types::PyTuple>, - ) -> pyo3::PyResult<(Self::Shared, Self)> { - todo!() - } } diff --git a/src/policies/vttlpolicy.rs b/src/policies/vttlpolicy.rs index 4a4ea4f..99ce385 100644 --- a/src/policies/vttlpolicy.rs +++ b/src/policies/vttlpolicy.rs @@ -300,7 +300,7 @@ impl PolicyExt for VTTLPolicy { = Vacant<'a> where Self: 'a; - const PICKLE_SIZE: isize = 1; + const PICKLE_SIZE: usize = 1; #[inline] fn current_size(&self) -> usize { @@ -475,21 +475,4 @@ impl PolicyExt for VTTLPolicy { currsize: self.currsize, } } - - fn build_pickle( - &self, - py: pyo3::Python, - tuple: &mut crate::internal::pickle::TupleBuilder, - ) -> pyo3::PyResult<()> { - todo!() - } - - fn from_pickle( - maxsize: usize, - getsizeof: Option, - global_ttl: Option, - builded: pyo3::Bound<'_, pyo3::types::PyTuple>, - ) -> pyo3::PyResult<(Self::Shared, Self)> { - todo!() - } } diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs index 86f79aa..3616578 100644 --- a/src/policies/wrapped.rs +++ b/src/policies/wrapped.rs @@ -3,6 +3,7 @@ use pyo3::types::PyTupleMethods; use crate::internal::alias; use crate::internal::pickle; +use crate::internal::pickle::Builder; use crate::policies::traits::HandleExt; use crate::policies::traits::OccupiedExt; use crate::policies::traits::PolicyEntry; @@ -262,14 +263,15 @@ impl Wrapped

{ let getsizeof: Option = self.shared.getsizeof().clone_ref(py).into(); builder - .push(py, self.shared.maxsize())? - .push(py, getsizeof)? - .push(py, self.shared.global_ttl())?; + .push(self.shared.maxsize())? + .push(getsizeof)? + .push(self.shared.global_ttl())?; - let policy = self.inner.lock(); - builder.push_tuple(py, P::PICKLE_SIZE, |tuple| policy.build_pickle(py, tuple))?; + let mut tuple = builder.begin_tuple(P::PICKLE_SIZE)?; + self.inner.lock().build_pickle(&mut tuple)?; + tuple.end()?; - Ok(builder.finish(py)) + Ok(builder.finish()) } } diff --git a/tests/mixins.py b/tests/mixins.py index 63a726f..8996250 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -506,6 +506,7 @@ def test_copy_preserves_maxsize(self): c2 = cache.copy() assert c2.maxsize == cache.maxsize + assert c2.current_size() == cache.current_size() def test_copy_is_shallow(self): cache = self.create_cache() @@ -548,6 +549,7 @@ def test_deepcopy_preserves_maxsize(self): c2 = stdcopy.deepcopy(cache) assert c2.maxsize == cache.maxsize + assert c2.current_size() == cache.current_size() def test_deepcopy_is_not_shallow(self): cache = self.create_cache() @@ -1079,6 +1081,7 @@ def test_fuzzy_getstate_setstate(self, pairs): pickled = original.__class__.__new__(original.__class__) pickled.__setstate__(state) + assert pickled.current_size() == original.current_size() assert pickled == original @given(pairs=st.lists(st.tuples(hashable_keys, any_value), max_size=20)) @@ -1091,6 +1094,7 @@ def test_fuzzy_pickle_loads_dumps(self, pairs): state = pickle.dumps(original) pickled = pickle.loads(state) + assert pickled.current_size() == original.current_size() assert pickled == original From 910a3441bf0d9233ec60fd5dd6e14d08be165d2a Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 29 May 2026 12:43:34 +0330 Subject: [PATCH 34/60] Fix wording in doccomments --- src/internal/alias.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/internal/alias.rs b/src/internal/alias.rs index 6457bb7..3694619 100644 --- a/src/internal/alias.rs +++ b/src/internal/alias.rs @@ -1,4 +1,4 @@ -//! There are type aliases that are used whole the library +//! There are type aliases that are used across the library /// Type alias for `pyo3::Py` pub type PyObject = pyo3::Py; From 75b9a5e564258eded5ca21b0075127ede4c616ee Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 29 May 2026 13:10:43 +0330 Subject: [PATCH 35/60] Support pickle for LFUCache --- src/internal/pickle.rs | 10 ++++++ src/policies/lfupolicy.rs | 67 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/src/internal/pickle.rs b/src/internal/pickle.rs index 2864bc1..07d8a85 100644 --- a/src/internal/pickle.rs +++ b/src/internal/pickle.rs @@ -1,11 +1,14 @@ use std::ptr; +use pyo3::IntoPyObject; + use crate::internal::alias; pub enum PyPickleVal<'a> { Owned(alias::PyObject), Borrowed(&'a alias::PyObject), Str(&'a str), + UnsignedBig(u128), Unsigned(usize), Signed(isize), Float(f64), @@ -19,6 +22,12 @@ impl From for PyPickleVal<'static> { PyPickleVal::Unsigned(v) } } +impl From for PyPickleVal<'static> { + #[inline] + fn from(v: u128) -> Self { + PyPickleVal::UnsignedBig(v) + } +} impl From for PyPickleVal<'static> { #[inline] fn from(v: isize) -> Self { @@ -87,6 +96,7 @@ impl<'a> PyPickleVal<'a> { pyo3::ffi::Py_INCREF(ptr); ptr } + Self::UnsignedBig(v) => v.into_pyobject(py)?.into_ptr(), Self::Unsigned(v) => pyo3::ffi::PyLong_FromSize_t(v), Self::Signed(v) => pyo3::ffi::PyLong_FromSsize_t(v), Self::Float(v) => pyo3::ffi::PyFloat_FromDouble(v), diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs index a5ba950..2776ba8 100644 --- a/src/policies/lfupolicy.rs +++ b/src/policies/lfupolicy.rs @@ -1,6 +1,9 @@ +use pyo3::types::PyAnyMethods; + use crate::hashbrown; use crate::internal::alias; use crate::internal::lazyheap; +use crate::internal::pickle::Builder; use crate::internal::utils; use crate::policies::traits; use crate::policies::traits::HandleExt; @@ -472,4 +475,68 @@ impl PolicyExt for LFUPolicy { currsize: self.currsize, } } + + fn build_pickle( + &self, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, + ) -> pyo3::PyResult<()> { + let mut list = tuple.begin_list()?; + + for cursor in unsafe { self.table.iter() } { + let handle = unsafe { cursor.as_ref().element() }; + + let mut tuple = list.begin_tuple(3)?; + tuple.push(handle.key.as_ref())?; + tuple.push(handle.value())?; + tuple.push(handle.frequency.0)?; + tuple.end()?; + } + + list.end() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + _global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyListMethods; + use pyo3::types::PyTupleMethods; + + let list = builded.get_item(0)?.cast_into::()?; + let list_length = list.len(); + + if list_length > maxsize { + return Err(new_py_error!( + PyValueError, + "list size is incompatible with maxsize" + )); + } + + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(list.len()); + + for bound in list.iter() { + let (key, value, frequency) = + bound.extract::<(alias::PyObject, alias::PyObject, u128)>()?; + + let handle = + FrequencyHandle::new(bound.py(), shared.getsizeof(), key, value, frequency)?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = slf.heap.push(handle); + unsafe { + slf.table.insert_no_grow(hash, cursor); + } + } + + slf.heap.sort_by(compare_fn!()); + Ok((shared, slf)) + } } From 008577a149abdab7fda74402604c7661e8b1feb6 Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 29 May 2026 13:19:33 +0330 Subject: [PATCH 36/60] Support pickle for LRUCache --- src/policies/fifopolicy.rs | 3 +- src/policies/lfupolicy.rs | 3 +- src/policies/lrupolicy.rs | 64 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index 9622f30..67abb28 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -1,7 +1,5 @@ use std::collections::VecDeque; -use pyo3::types::PyAnyMethods; - use crate::hashbrown; use crate::internal::alias; use crate::internal::pickle::Builder; @@ -422,6 +420,7 @@ impl PolicyExt for FIFOPolicy { _global_ttl: Option, builded: pyo3::Bound<'_, pyo3::types::PyTuple>, ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyAnyMethods; use pyo3::types::PyListMethods; use pyo3::types::PyTupleMethods; diff --git a/src/policies/lfupolicy.rs b/src/policies/lfupolicy.rs index 2776ba8..82010e8 100644 --- a/src/policies/lfupolicy.rs +++ b/src/policies/lfupolicy.rs @@ -1,5 +1,3 @@ -use pyo3::types::PyAnyMethods; - use crate::hashbrown; use crate::internal::alias; use crate::internal::lazyheap; @@ -504,6 +502,7 @@ impl PolicyExt for LFUPolicy { _global_ttl: Option, builded: pyo3::Bound<'_, pyo3::types::PyTuple>, ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyAnyMethods; use pyo3::types::PyListMethods; use pyo3::types::PyTupleMethods; diff --git a/src/policies/lrupolicy.rs b/src/policies/lrupolicy.rs index 5b49be1..244cab9 100644 --- a/src/policies/lrupolicy.rs +++ b/src/policies/lrupolicy.rs @@ -1,5 +1,7 @@ use crate::hashbrown; +use crate::internal::alias; use crate::internal::linked_list; +use crate::internal::pickle::Builder; use crate::internal::utils; use crate::policies::traits; use crate::policies::traits::HandleExt; @@ -312,4 +314,66 @@ impl PolicyExt for LRUPolicy { currsize: self.currsize, } } + + fn build_pickle( + &self, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, + ) -> pyo3::PyResult<()> { + let mut list = tuple.begin_list()?; + + for cursor in unsafe { self.list.iter() } { + let handle = unsafe { cursor.element() }; + + let mut tuple = list.begin_tuple(2)?; + tuple.push(handle.key().as_ref())?; + tuple.push(handle.value())?; + tuple.end()?; + } + + list.end() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + _global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyAnyMethods; + use pyo3::types::PyListMethods; + use pyo3::types::PyTupleMethods; + + let list = builded.get_item(0)?.cast_into::()?; + let list_length = list.len(); + + if list_length > maxsize { + return Err(new_py_error!( + PyValueError, + "list size is incompatible with maxsize" + )); + } + + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(list.len()); + + for bound in list.iter() { + let (key, value) = bound.extract::<(alias::PyObject, alias::PyObject)>()?; + + let handle = Handle::new(bound.py(), shared.getsizeof(), key, value)?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = slf.list.push_back(handle); + + unsafe { + slf.table.insert_no_grow(hash, cursor); + } + } + + Ok((shared, slf)) + } } From e40732748ef1fe88c013a514fdace2b7769fdb7e Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 29 May 2026 13:57:21 +0330 Subject: [PATCH 37/60] Support pickle for TTLCache/VTTLCache --- src/internal/onceinit.rs | 5 +++ src/internal/utils.rs | 7 ++++ src/policies/common.rs | 7 +--- src/policies/traits.rs | 8 +--- src/policies/ttlpolicy.rs | 80 ++++++++++++++++++++++++++++++++++++++ src/policies/vttlpolicy.rs | 76 ++++++++++++++++++++++++++++++++++++ src/policies/wrapped.rs | 14 ++++++- src/pyclasses/cache.rs | 8 ++++ src/pyclasses/fifocache.rs | 8 ++++ src/pyclasses/lfucache.rs | 8 ++++ src/pyclasses/lrucache.rs | 8 ++++ src/pyclasses/rrcache.rs | 8 ++++ src/pyclasses/ttlcache.rs | 15 +++++-- src/pyclasses/vttlcache.rs | 8 ++++ tests/mixins.py | 6 --- 15 files changed, 244 insertions(+), 22 deletions(-) diff --git a/src/internal/onceinit.rs b/src/internal/onceinit.rs index 0e79d44..a365c2b 100644 --- a/src/internal/onceinit.rs +++ b/src/internal/onceinit.rs @@ -63,6 +63,11 @@ impl OnceInit { .into() } + #[inline] + pub fn is_initialized(&self) -> bool { + self.0.state.load(atomic::Ordering::Acquire) == INIT + } + /// Initializes the container with `val`, transitioning state from `UNINIT` to `INIT`. /// /// Intended to be called from the PyO3 `__init__` handler once the Python-side diff --git a/src/internal/utils.rs b/src/internal/utils.rs index 77544d0..e239fdd 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -296,6 +296,13 @@ impl From for ExpiresAt { } } +impl From for ExpiresAt { + #[inline] + fn from(value: std::time::SystemTime) -> Self { + Self::Instant(value.into()) + } +} + impl From for std::time::SystemTime { #[inline] fn from(value: ExpiresAt) -> Self { diff --git a/src/policies/common.rs b/src/policies/common.rs index d30d6a5..9f657dc 100644 --- a/src/policies/common.rs +++ b/src/policies/common.rs @@ -114,15 +114,12 @@ impl Shared { /// Creates a new [`Shared`]. #[inline] pub fn new(maxsize: usize, getsizeof: Option) -> Self { - unsafe { Self::with_ttl(maxsize, getsizeof, None) } + Self::with_ttl(maxsize, getsizeof, None) } /// Creates a new [`Shared`] with configured TTL. - /// - /// # Safety - /// `ttl` should not be negative or zero. #[inline] - pub unsafe fn with_ttl( + pub fn with_ttl( maxsize: usize, getsizeof: Option, ttl: Option, diff --git a/src/policies/traits.rs b/src/policies/traits.rs index adef42b..99da9b2 100644 --- a/src/policies/traits.rs +++ b/src/policies/traits.rs @@ -144,9 +144,7 @@ pub trait PolicyExt: Sized { fn build_pickle( &self, tuple: &mut pickle::TupleBuilder<'_, pickle::PickleBuilder>, - ) -> pyo3::PyResult<()> { - todo!() - } + ) -> pyo3::PyResult<()>; /// Loads the builded pickle. fn from_pickle( @@ -154,7 +152,5 @@ pub trait PolicyExt: Sized { getsizeof: Option, global_ttl: Option, builded: pyo3::Bound<'_, pyo3::types::PyTuple>, - ) -> pyo3::PyResult<(Self::Shared, Self)> { - todo!() - } + ) -> pyo3::PyResult<(Self::Shared, Self)>; } diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index c0b22ed..fa03175 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -2,6 +2,7 @@ use std::collections::VecDeque; use crate::hashbrown; use crate::internal::alias; +use crate::internal::pickle::Builder; use crate::internal::utils; use crate::policies::traits; use crate::policies::traits::HandleExt; @@ -524,4 +525,83 @@ impl PolicyExt for TTLPolicy { front_offset: self.front_offset, } } + + fn build_pickle( + &self, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, + ) -> pyo3::PyResult<()> { + let mut list = tuple.begin_list()?; + + for handle in self.entries.iter() { + let mut tuple = list.begin_tuple(3)?; + tuple.push(handle.key().as_ref())?; + tuple.push(handle.value())?; + tuple.push( + handle + .expires_at + .duration_since(std::time::UNIX_EPOCH) + .unwrap(), + )?; + tuple.end()?; + } + + list.end() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyAnyMethods; + use pyo3::types::PyListMethods; + use pyo3::types::PyTupleMethods; + + if global_ttl.is_none_or(|x| x.is_zero()) { + return Err(new_py_error!(PyValueError, "global_ttl is zero")); + } + + let list = builded.get_item(0)?.cast_into::()?; + let list_length = list.len(); + + if list_length > maxsize { + return Err(new_py_error!( + PyValueError, + "list size is incompatible with maxsize" + )); + } + + let shared = Shared::with_ttl(maxsize, getsizeof, global_ttl); + let mut slf = Self::new(list.len()); + + for bound in list.iter() { + let (key, value, timestamp) = + bound.extract::<(alias::PyObject, alias::PyObject, f64)>()?; + + let handle = ExpiringHandle::new( + bound.py(), + shared.getsizeof(), + (std::time::UNIX_EPOCH + std::time::Duration::from_secs_f64(timestamp)).into(), + key, + value, + )?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + + unsafe { + slf.table.insert_no_grow( + handle.key().hash(), + // Adding `slf.front_offset` is unnecessary here + slf.entries.len(), + ); + } + slf.entries.push_back(handle); + } + + Ok((shared, slf)) + } } diff --git a/src/policies/vttlpolicy.rs b/src/policies/vttlpolicy.rs index 99ce385..e8e88c1 100644 --- a/src/policies/vttlpolicy.rs +++ b/src/policies/vttlpolicy.rs @@ -1,6 +1,7 @@ use crate::hashbrown; use crate::internal::alias; use crate::internal::lazyheap; +use crate::internal::pickle::Builder; use crate::internal::utils; use crate::policies::traits; use crate::policies::traits::HandleExt; @@ -475,4 +476,79 @@ impl PolicyExt for VTTLPolicy { currsize: self.currsize, } } + fn build_pickle( + &self, + tuple: &mut crate::internal::pickle::TupleBuilder< + '_, + crate::internal::pickle::PickleBuilder, + >, + ) -> pyo3::PyResult<()> { + let mut list = tuple.begin_list()?; + + for cursor in unsafe { self.table.iter() } { + let handle = unsafe { cursor.as_ref().element() }; + + let mut tuple = list.begin_tuple(3)?; + tuple.push(handle.key.as_ref())?; + tuple.push(handle.value())?; + tuple.push( + handle + .expires_at + .map(|x| x.duration_since(std::time::UNIX_EPOCH).unwrap()), + )?; + tuple.end()?; + } + + list.end() + } + + fn from_pickle( + maxsize: usize, + getsizeof: Option, + _global_ttl: Option, + builded: pyo3::Bound<'_, pyo3::types::PyTuple>, + ) -> pyo3::PyResult<(Self::Shared, Self)> { + use pyo3::types::PyAnyMethods; + use pyo3::types::PyListMethods; + use pyo3::types::PyTupleMethods; + + let list = builded.get_item(0)?.cast_into::()?; + let list_length = list.len(); + + if list_length > maxsize { + return Err(new_py_error!( + PyValueError, + "list size is incompatible with maxsize" + )); + } + + let shared = Shared::new(maxsize, getsizeof); + let mut slf = Self::new(list.len()); + + for bound in list.iter() { + let (key, value, timestamp) = + bound.extract::<(alias::PyObject, alias::PyObject, Option)>()?; + + let handle = ExpiringHandle::new( + bound.py(), + shared.getsizeof(), + timestamp + .map(|x| std::time::UNIX_EPOCH + std::time::Duration::from_secs_f64(x)) + .map(Into::into), + key, + value, + )?; + + slf.currsize = slf.currsize.saturating_add(handle.size()); + + let hash = handle.key().hash(); + let cursor = slf.heap.push(handle); + unsafe { + slf.table.insert_no_grow(hash, cursor); + } + } + + slf.heap.sort_by(compare_fn!()); + Ok((shared, slf)) + } } diff --git a/src/policies/wrapped.rs b/src/policies/wrapped.rs index 3616578..d729803 100644 --- a/src/policies/wrapped.rs +++ b/src/policies/wrapped.rs @@ -281,10 +281,20 @@ impl Wrapped

{ let maxsize: usize = tuple.get_item(0)?.extract()?; let getsizeof: Option = tuple.get_item(1)?.extract()?; - let global_ttl: Option = tuple.get_item(2)?.extract()?; + let global_ttl: Option = tuple.get_item(2)?.extract()?; + + if global_ttl.is_some_and(|x| x < 0.0) { + return Err(new_py_error!(PyValueError, "global_ttl is negative")); + } + let builded = tuple.get_item(3)?.cast_into::()?; - let (shared, inner) = P::from_pickle(maxsize, getsizeof, global_ttl, builded)?; + let (shared, inner) = P::from_pickle( + maxsize, + getsizeof, + global_ttl.map(|x| std::time::Duration::from_secs_f64(x)), + builded, + )?; Ok(Self { shared, diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index 945bc96..636db68 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -589,6 +589,10 @@ impl PyCache { } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + let inner = self.0.get(); let policy = inner.policy(); @@ -602,6 +606,10 @@ impl PyCache { } fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + let inner = self.0.get(); let mut policy = inner.policy(); policy.clear(inner.shared()); diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index 854448f..176928e 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -619,6 +619,10 @@ impl PyFIFOCache { } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + let inner = self.0.get(); let policy = inner.policy(); @@ -630,6 +634,10 @@ impl PyFIFOCache { } fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + let inner = self.0.get(); let mut policy = inner.policy(); policy.clear(inner.shared()); diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs index 00cb001..20b3fee 100644 --- a/src/pyclasses/lfucache.rs +++ b/src/pyclasses/lfucache.rs @@ -683,6 +683,10 @@ impl PyLFUCache { } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + let inner = self.0.get(); let policy = inner.policy(); @@ -696,6 +700,10 @@ impl PyLFUCache { } fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + let inner = self.0.get(); let mut policy = inner.policy(); policy.clear(inner.shared()); diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs index 46471b1..3f6a3e1 100644 --- a/src/pyclasses/lrucache.rs +++ b/src/pyclasses/lrucache.rs @@ -665,6 +665,10 @@ impl PyLRUCache { } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + let inner = self.0.get(); let policy = inner.policy(); @@ -678,6 +682,10 @@ impl PyLRUCache { } fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + let inner = self.0.get(); let mut policy = inner.policy(); policy.clear(inner.shared()); diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs index a7fe47d..511f8b2 100644 --- a/src/pyclasses/rrcache.rs +++ b/src/pyclasses/rrcache.rs @@ -611,6 +611,10 @@ impl PyRRCache { } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + let inner = self.0.get(); let policy = inner.policy(); @@ -624,6 +628,10 @@ impl PyRRCache { } fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + let inner = self.0.get(); let mut policy = inner.policy(); policy.clear(inner.shared()); diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index e51c44e..6a065e8 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -63,9 +63,10 @@ impl PyTTLCache { )); } - let wrapped = Wrapped::new(ttlpolicy::TTLPolicy::new(capacity), unsafe { - ttlpolicy::Shared::with_ttl(maxsize, getsizeof, Some(global_ttl.into())) - }); + let wrapped = Wrapped::new( + ttlpolicy::TTLPolicy::new(capacity), + ttlpolicy::Shared::with_ttl(maxsize, getsizeof, Some(global_ttl.into())), + ); // Populate cache if `iterable` passed let extend_result = { @@ -755,6 +756,10 @@ impl PyTTLCache { } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + let inner = self.0.get(); let policy = inner.policy(); @@ -766,6 +771,10 @@ impl PyTTLCache { } fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + let inner = self.0.get(); let mut policy = inner.policy(); policy.clear(inner.shared()); diff --git a/src/pyclasses/vttlcache.rs b/src/pyclasses/vttlcache.rs index df01150..589b543 100644 --- a/src/pyclasses/vttlcache.rs +++ b/src/pyclasses/vttlcache.rs @@ -723,6 +723,10 @@ impl PyVTTLCache { } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + if self.0.is_initialized() { + return Ok(()); + } + let inner = self.0.get(); let policy = inner.policy(); @@ -736,6 +740,10 @@ impl PyVTTLCache { } fn __clear__(&self) { + if self.0.is_initialized() { + return; + } + let inner = self.0.get(); let mut policy = inner.policy(); policy.clear(inner.shared()); diff --git a/tests/mixins.py b/tests/mixins.py index 8996250..7dade7f 100644 --- a/tests/mixins.py +++ b/tests/mixins.py @@ -1073,9 +1073,6 @@ def test_fuzzy_chain_methods(self, key, value): def test_fuzzy_getstate_setstate(self, pairs): original = self.create_cache(20, pairs) - if not hasattr(original, "__setstate__"): - pytest.skip(f"{original.__class__} doesn't support __setstate__") - state = original.__getstate__() pickled = original.__class__.__new__(original.__class__) @@ -1088,9 +1085,6 @@ def test_fuzzy_getstate_setstate(self, pairs): def test_fuzzy_pickle_loads_dumps(self, pairs): original = self.create_cache(20, pairs) - if not hasattr(original, "__setstate__"): - pytest.skip(f"{original.__class__} doesn't support pickle") - state = pickle.dumps(original) pickled = pickle.loads(state) From 7bd159be259d16b08396b3dde2c04647776acd50 Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 29 May 2026 13:58:42 +0330 Subject: [PATCH 38/60] Complete refactoring pickle & deepcopy --- src/lib.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 63f283b..d57776b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,9 +11,6 @@ pub mod internal; pub mod policies; pub mod pyclasses; -// TODO: refactor pickle -// TODO: refactor deepcopy - #[pyo3::pymodule] mod _core { #[allow(unused_imports)] From 201ba144c62432bd07c91a727bbe4bdcf537d8de Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 29 May 2026 19:55:16 +0330 Subject: [PATCH 39/60] Add __version__ variable --- cachebox/__init__.py | 9 ++------- cachebox/_core.pyi | 11 +++++++---- src/lib.rs | 7 ++++++- tests/test_impls.py | 4 ++-- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/cachebox/__init__.py b/cachebox/__init__.py index 46bb79f..76c2c1a 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -6,10 +6,5 @@ from ._cachebox import RRCache as RRCache from ._cachebox import TTLCache as TTLCache from ._cachebox import VTTLCache as VTTLCache - -try: - from ._core import ( - _fifocache_small_offset as _fifocache_small_offset, # type: ignore - ) -except ImportError: - pass +from ._core import __version__ as __version__ +from ._core import _small_offset_feature as _small_offset_feature diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 375c41c..1bdcd66 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -3,6 +3,9 @@ from datetime import datetime, timedelta from _typeshed import SupportsItems +_small_offset_feature: typing.Final[bool] +__version__: typing.Final[str] + KT = typing.TypeVar("KT", bound=typing.Hashable) VT = typing.TypeVar("VT") DT = typing.TypeVar("DT") @@ -52,7 +55,7 @@ class BaseCacheImpl(typing.Generic[KT, VT]): getsizeof: A callable that computes the size of a key-value pair. When ``None``, each entry is assumed to have a size of 1 (equivalent to ``lambda k, v: 1``). Use this to implement - weighted caching — for example, sizing entries by memory + weighted caching - for example, sizing entries by memory footprint or byte length. Note: @@ -385,7 +388,7 @@ class FIFOCache(BaseCacheImpl[KT, VT]): Items are stored in insertion order. When capacity is reached, the item that has been present the longest is evicted. There is no concept of - "recently used" or "frequently used" — age alone determines eviction order. + "recently used" or "frequently used" - age alone determines eviction order. Conceptually it behaves like a queue: new items join the back and evictions come from the front. @@ -705,7 +708,7 @@ class LRUCache(BaseCacheImpl[KT, VT]): time is removed first, regardless of how many times it was accessed in the past. - Items are tracked by access recency — every read or write promotes an item + Items are tracked by access recency - every read or write promotes an item to "most recently used". When capacity is reached, the least recently used item (accessed longest ago) is evicted. @@ -894,7 +897,7 @@ class LFUCache(BaseCacheImpl[KT, VT]): A cache with a Least-Frequently-Used (LFU) eviction policy. When the cache is full, the item with the lowest access count is evicted - first. Ties in frequency are broken by recency — among equally rare items, + first. Ties in frequency are broken by recency - among equally rare items, the oldest is evicted. Access counts are tracked per key. This implementation uses a lazy binary diff --git a/src/lib.rs b/src/lib.rs index d57776b..e142c6f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,8 +82,13 @@ mod _core { pub fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { typeref::initialize_typeref(m.py()); + m.add("__version__", env!("CARGO_PKG_VERSION"))?; + #[cfg(feature = "small-offset")] - m.add("_fifocache_small_offset", true)?; + m.add("_small_offset_feature", true)?; + + #[cfg(not(feature = "small-offset"))] + m.add("_small_offset_feature", false)?; Ok(()) } diff --git a/tests/test_impls.py b/tests/test_impls.py index 31a899e..14c3a95 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -306,7 +306,7 @@ def test_clear_resets_fifo_order(self): assert cache.last() == 30 @pytest.mark.skipif( - not hasattr(cachebox, "_fifocache_small_offset"), + cachebox._small_offset_feature, reason="requires small-offset feature flag", ) def test_edge_case_of_front_offset_overflow(self): @@ -1160,7 +1160,7 @@ def test_clear_resets_fifo_order(self): assert cache.last() == 30 @pytest.mark.skipif( - not hasattr(cachebox, "_fifocache_small_offset"), + cachebox._small_offset_feature, reason="requires small-offset feature flag", ) def test_edge_case_of_front_offset_overflow(self): From f205e6b49e6402b2643779014e68e2c97ba88a76 Mon Sep 17 00:00:00 2001 From: awolverp Date: Sat, 30 May 2026 12:22:31 +0330 Subject: [PATCH 40/60] Optimize all `items`/`keys`/`values` methods --- cachebox/__init__.py | 3 +++ cachebox/_core.pyi | 2 +- src/pyclasses/cache.rs | 17 +++++++++-------- src/pyclasses/fifocache.rs | 16 ++++++++-------- src/pyclasses/lfucache.rs | 23 ++++++++++------------- src/pyclasses/lrucache.rs | 16 ++++++++-------- src/pyclasses/rrcache.rs | 16 ++++++++-------- src/pyclasses/ttlcache.rs | 23 ++++++++++------------- src/pyclasses/vttlcache.rs | 23 ++++++++++------------- 9 files changed, 67 insertions(+), 72 deletions(-) diff --git a/cachebox/__init__.py b/cachebox/__init__.py index 76c2c1a..4fd9e00 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -8,3 +8,6 @@ from ._cachebox import VTTLCache as VTTLCache from ._core import __version__ as __version__ from ._core import _small_offset_feature as _small_offset_feature + +# utils +from .utils import Frozen as Frozen diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 1bdcd66..31020f0 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -67,7 +67,7 @@ class BaseCacheImpl(typing.Generic[KT, VT]): @property def maxsize(self) -> int: - """int: The configured ``maxsize``.""" + """The configured ``maxsize``.""" ... @property diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index 636db68..e1e3c43 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -489,7 +489,7 @@ impl PyCache { .map(|x| !x) } - fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn items(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -500,10 +500,11 @@ impl PyCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn values(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -514,10 +515,10 @@ impl PyCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn keys(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -528,12 +529,12 @@ impl PyCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } #[inline] - fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { - self.keys(py) + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() } fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index 176928e..7a586aa 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -494,7 +494,7 @@ impl PyFIFOCache { .map(|x| !x) } - fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn items(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -505,10 +505,10 @@ impl PyFIFOCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn values(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -519,10 +519,10 @@ impl PyFIFOCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn keys(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -533,12 +533,12 @@ impl PyFIFOCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } #[inline] - fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { - self.keys(py) + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() } fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs index 20b3fee..fb7208b 100644 --- a/src/pyclasses/lfucache.rs +++ b/src/pyclasses/lfucache.rs @@ -512,7 +512,7 @@ impl PyLFUCache { .map(|x| !x) } - fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn items(&self) -> pyo3::PyResult> { let inner = self.0.get(); let mut policy = inner.policy(); @@ -524,10 +524,10 @@ impl PyLFUCache { gv: gv.clone(), initial_gv: gv.get(), }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn values(&self) -> pyo3::PyResult> { let inner = self.0.get(); let mut policy = inner.policy(); @@ -539,10 +539,10 @@ impl PyLFUCache { gv: gv.clone(), initial_gv: gv.get(), }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn keys(&self) -> pyo3::PyResult> { let inner = self.0.get(); let mut policy = inner.policy(); @@ -554,18 +554,15 @@ impl PyLFUCache { gv: gv.clone(), initial_gv: gv.get(), }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } #[inline] - fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { - self.keys(py) + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() } - fn items_with_frequency( - &self, - py: pyo3::Python, - ) -> pyo3::PyResult> { + fn items_with_frequency(&self) -> pyo3::PyResult> { let inner = self.0.get(); let mut policy = inner.policy(); @@ -577,7 +574,7 @@ impl PyLFUCache { gv: gv.clone(), initial_gv: gv.get(), }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs index 3f6a3e1..eba106a 100644 --- a/src/pyclasses/lrucache.rs +++ b/src/pyclasses/lrucache.rs @@ -521,7 +521,7 @@ impl PyLRUCache { .map(|x| !x) } - fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn items(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -532,10 +532,10 @@ impl PyLRUCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn values(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -546,10 +546,10 @@ impl PyLRUCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn keys(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -560,12 +560,12 @@ impl PyLRUCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } #[inline] - fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { - self.keys(py) + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() } fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs index 511f8b2..1b33bba 100644 --- a/src/pyclasses/rrcache.rs +++ b/src/pyclasses/rrcache.rs @@ -494,7 +494,7 @@ impl PyRRCache { .map(|x| !x) } - fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn items(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -505,10 +505,10 @@ impl PyRRCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn values(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -519,10 +519,10 @@ impl PyRRCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn keys(&self) -> pyo3::PyResult> { let inner = self.0.get(); let gv = inner.shared().generation_version().clone(); let initial_gv = gv.get(); @@ -533,12 +533,12 @@ impl PyRRCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } #[inline] - fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { - self.keys(py) + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() } fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index 6a065e8..e8d949c 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -499,7 +499,7 @@ impl PyTTLCache { .map(|x| !x) } - fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn items(&self) -> pyo3::PyResult> { let inner = self.0.get(); let iter = inner.policy().iter(inner.shared()); @@ -513,10 +513,10 @@ impl PyTTLCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn values(&self) -> pyo3::PyResult> { let inner = self.0.get(); let iter = inner.policy().iter(inner.shared()); @@ -530,10 +530,10 @@ impl PyTTLCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn keys(&self) -> pyo3::PyResult> { let inner = self.0.get(); let iter = inner.policy().iter(inner.shared()); @@ -547,12 +547,12 @@ impl PyTTLCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } #[inline] - fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { - self.keys(py) + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() } fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -735,10 +735,7 @@ impl PyTTLCache { Ok((key.into(), val, dur.as_secs_f64())) } - fn items_with_expire( - &self, - py: pyo3::Python, - ) -> pyo3::PyResult> { + fn items_with_expire(&self) -> pyo3::PyResult> { let inner = self.0.get(); let iter = inner.policy().iter(inner.shared()); @@ -752,7 +749,7 @@ impl PyTTLCache { gv, initial_gv, }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { diff --git a/src/pyclasses/vttlcache.rs b/src/pyclasses/vttlcache.rs index 589b543..84baf30 100644 --- a/src/pyclasses/vttlcache.rs +++ b/src/pyclasses/vttlcache.rs @@ -480,7 +480,7 @@ impl PyVTTLCache { .map(|x| !x) } - fn items(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn items(&self) -> pyo3::PyResult> { let inner = self.0.get(); let mut policy = inner.policy(); @@ -492,10 +492,10 @@ impl PyVTTLCache { gv: gv.clone(), initial_gv: gv.get(), }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn values(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn values(&self) -> pyo3::PyResult> { let inner = self.0.get(); let mut policy = inner.policy(); @@ -507,10 +507,10 @@ impl PyVTTLCache { gv: gv.clone(), initial_gv: gv.get(), }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } - fn keys(&self, py: pyo3::Python) -> pyo3::PyResult> { + fn keys(&self) -> pyo3::PyResult> { let inner = self.0.get(); let mut policy = inner.policy(); @@ -522,12 +522,12 @@ impl PyVTTLCache { gv: gv.clone(), initial_gv: gv.get(), }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } #[inline] - fn __iter__(&self, py: pyo3::Python) -> pyo3::PyResult> { - self.keys(py) + fn __iter__(&self) -> pyo3::PyResult> { + self.keys() } fn copy(&self, py: pyo3::Python) -> pyo3::PyResult> { @@ -704,10 +704,7 @@ impl PyVTTLCache { Ok((key.into(), val, dur)) } - fn items_with_expire( - &self, - py: pyo3::Python, - ) -> pyo3::PyResult> { + fn items_with_expire(&self) -> pyo3::PyResult> { let inner = self.0.get(); let mut policy = inner.policy(); @@ -719,7 +716,7 @@ impl PyVTTLCache { gv: gv.clone(), initial_gv: gv.get(), }; - pyo3::Py::new(py, result) + pyo3::Python::attach(|py| pyo3::Py::new(py, result)) } fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { From 9747a145af9e89579d54d4d9cd67e6816a9c145f Mon Sep 17 00:00:00 2001 From: awolverp Date: Sat, 30 May 2026 12:43:22 +0330 Subject: [PATCH 41/60] Refactor & optimize `utils` - Replaced `copy_level` parameter with more flexible `postprocess` callable (defaults to `postprocess_copy_mutables`). Now supports None (no copy), shallow copy, deep copy, or custom post-processing. - Removed deprecated `cachedmethod` decorator (use cached(..., cache=lambda self: self._cache) instead). - `CacheInfo` namedtuple no longer includes the `memory` field. - Default cache changed from `FIFOCache` to `LRUCache`. - Fixed incorrect `is_method` handling. - Improved per-key lock management: now uses a plain `dict` instead of `defaultdict`, and removes the lock when no waiters remain (better memory behavior under high contention). - Improved key makers (`make_key`, `make_hash_key`, `make_typed_key`) --- cachebox/__init__.py | 15 +- cachebox/utils.py | 646 +++++++++++++++++++++++++++++++++++++++++++ tests/test_utils.py | 1 + 3 files changed, 661 insertions(+), 1 deletion(-) create mode 100644 cachebox/utils.py create mode 100644 tests/test_utils.py diff --git a/cachebox/__init__.py b/cachebox/__init__.py index 4fd9e00..906a5e1 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -9,5 +9,18 @@ from ._core import __version__ as __version__ from ._core import _small_offset_feature as _small_offset_feature -# utils +# Utils from .utils import Frozen as Frozen +from .utils import cached as cached +from .utils import is_cached as is_cached + +# Key maker functions +from .utils import make_hash_key as make_hash_key +from .utils import make_key as make_key +from .utils import make_typed_key as make_typed_key + +# Postprocess functions +from .utils import postprocess_copy as postprocess_copy +from .utils import postprocess_copy_mutables as postprocess_copy_mutables +from .utils import postprocess_deepcopy as postprocess_deepcopy +from .utils import postprocess_deepcopy_mutables as postprocess_deepcopy_mutables diff --git a/cachebox/utils.py b/cachebox/utils.py new file mode 100644 index 0000000..f5a131f --- /dev/null +++ b/cachebox/utils.py @@ -0,0 +1,646 @@ +import _thread +import asyncio +import functools +import inspect +import typing +from collections import namedtuple +from copy import copy as _shallow_copy +from copy import deepcopy as _deep_copy + +from ._cachebox import BaseCacheImpl, LRUCache + +if typing.TYPE_CHECKING: + from ._core import _IterableType + +KT = typing.TypeVar("KT") +VT = typing.TypeVar("VT") +DT = typing.TypeVar("DT") +FT = typing.TypeVar("FT", bound=typing.Callable[..., typing.Any]) + +_PostProcess: typing.TypeAlias = typing.Callable[[typing.Any], typing.Any] | None + + +_COPY_TYPES = frozenset((dict, list, set)) + + +def postprocess_copy_mutables(value: VT) -> VT: + """ + Shallow-copy *value* before returning it (only `dict`, `list`, and `set`) + """ + if type(value) in _COPY_TYPES: + return _shallow_copy(value) + + return value + + +def postprocess_copy(value: VT) -> VT: + """Shallow-copy *value* before returning it""" + return _shallow_copy(value) + + +def postprocess_deepcopy_mutables(value: VT) -> VT: + """ + Deep-copy *value* before returning it (only `dict`, `list`, and `set`) + """ + if type(value) in _COPY_TYPES: + return _deep_copy(value) + + return value + + +def postprocess_deepcopy(value: VT) -> VT: + """Deep-copy *value* before returning it""" + return _deep_copy(value) + + +_KWDS_MARK = object() +_FAST_TYPES = frozenset((int, str)) + + +def make_key(*args, **kwds) -> typing.Hashable: + """ + Default cache key. + + Fast-path: a single ``int`` or ``str`` argument is returned as-is. + Otherwise a plain tuple (plus a kwargs sentinel when needed) is returned. + """ + if not kwds: + if len(args) == 1 and type(args[0]) in _FAST_TYPES: + return args[0] + return args + + key = args + (_KWDS_MARK,) + for item in kwds.items(): + key += item + return key[0] if len(key) == 1 and type(key[0]) in _FAST_TYPES else key + + +def make_hash_key(*args, **kwds) -> int: + """ + Key as the hash of all positional and keyword arguments. + + Avoids storing the raw argument tuple, at the cost of potential hash + collisions mapping distinct inputs to the same cache slot. + """ + if not kwds: + return hash(args) + key = args + (_KWDS_MARK,) + for item in kwds.items(): + key += item + return hash(key) + + +def make_typed_key(*args, **kwds) -> tuple: + """ + Key that includes the runtime type of every argument. + + Ensures ``f(1)`` and ``f(1.0)`` are cached separately even though + ``1 == 1.0``. + """ + key: tuple = args + if kwds: + key += (_KWDS_MARK,) + for item in kwds.items(): + key += item + + key += tuple(type(v) for v in args) + if kwds: + key += tuple(type(v) for v in kwds.values()) + + return key + + +class Frozen(BaseCacheImpl[KT, VT]): # pragma: no cover + """ + A wrapper class that prevents modifications to an underlying cache implementation. + + This class provides a read-only view of a cache, optionally allowing silent + suppression of modification attempts instead of raising exceptions. + """ + + __slots__ = ("__cache", "ignore") + + def __init__(self, cls: BaseCacheImpl[KT, VT], ignore: bool = False) -> None: + """ + Initialize a frozen cache wrapper. + + Args: + cls: The underlying cache implementation to be frozen. + ignore: If ``True``, silently ignores modification attempts; if ``False``, raises + ``TypeError`` when modification is attempted. Default is ``False``. + """ + assert isinstance(cls, BaseCacheImpl) + assert type(cls) is not Frozen + + self.__cache = cls + self.ignore = ignore + + def _guard(self) -> None: + if not self.ignore: + raise TypeError("This cache is frozen.") + + @property + def cache(self) -> BaseCacheImpl[KT, VT]: + """Returns the wrapped cache implementation.""" + return self.__cache + + @property + def maxsize(self) -> int: + """The configured ``maxsize``.""" + return self.__cache.maxsize + + @property + def getsizeof(self) -> typing.Callable[[KT, VT], int] | None: + """Callable or None: The configured ``getsizeof`` function.""" + return self.__cache.getsizeof + + def current_size(self) -> int: + """ + Returns the current total cumulative size of all stored entries. + + Returns: + The sum of sizes of all entries currently in the cache. + """ + return self.__cache.current_size() + + def remaining_size(self) -> int: + """ + Returns the remaining available size. + + Returns: + The result of ``maxsize - current_size``. + """ + return self.__cache.remaining_size() + + def capacity(self) -> int: + """ + Returns the number of elements the map can hold without reallocating. + + Returns: + The current allocated capacity. + """ + return self.__cache.capacity() + + def __len__(self) -> int: + """ + Returns the number of entries currently in the cache. + + Returns: + The number of entries in the cache. + """ + return len(self.__cache) + + def __sizeof__(self) -> int: + return self.__cache.__sizeof__() + + def __bool__(self) -> bool: + return bool(self.__cache) + + def __contains__(self, key: KT) -> bool: + return self.__cache.contains(key) + + def contains(self, key: KT) -> bool: + """ + Returns ``True`` if the cache contains an entry for ``key``. + + Equivalent to ``key in self``. Prefer this method over ``key in self`` + to keep code compatible across different cache policies. + + Args: + key: The key to look up. + + Returns: + ``True`` if the key exists in the cache, ``False`` otherwise. + """ + return self.__cache.contains(key) + + def is_empty(self) -> bool: + """ + Returns ``True`` if the cache is empty. + + Returns: + ``True`` if the cache contains no entries. + """ + return self.__cache.is_empty() + + def is_full(self) -> bool: + """ + Returns ``True`` when the cumulative size has reached the maxsize limit. + + Returns: + ``True`` if the cache is at capacity. + """ + return self.__cache.is_full() + + def insert( + self, + key: KT, + value: VT, + *args: typing.Any, + **kwargs: typing.Any, + ) -> typing.Optional[VT]: + return self._guard() + + def __setitem__(self, key: KT, value: VT) -> None: + return self._guard() + + def update( + self, + iterable: _IterableType[KT, VT], + *args: typing.Any, + **kwargs: typing.Any, + ) -> None: + return self._guard() + + def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + return self.__cache.get(key, default) + + def __getitem__(self, key: KT) -> VT: + return self.__cache[key] + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + *args: typing.Any, + **kwargs: typing.Any, + ) -> typing.Optional[VT | DT]: + return self._guard() + + def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: + """ + Removes the specified key and returns the corresponding value. + + Args: + key: The key to remove. + default: Value to return if the key is not found. + + Returns: + The value associated with ``key``, or ``default`` if not found. + + Raises: + KeyError: If the key is not found and no ``default`` is provided. + """ + return self._guard() # type: ignore[return-value] + + def __delitem__(self, key: KT) -> None: + return self._guard() + + def popitem(self) -> typing.Tuple[KT, VT]: + return self._guard() # type: ignore[return-value] + + def drain(self, n: int) -> int: + """ + Calls ``popitem()`` ``n`` times and returns the count of removed items. + + Args: + n: The number of items to remove. + + Returns: + The number of items successfully removed. + """ + return self._guard() # type: ignore[return-value] + + def shrink_to_fit(self) -> None: + """Shrinks the internal allocation as close to the current length as possible.""" + return self._guard() + + def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from the cache. + + Args: + reuse: If ``True``, retains the allocated memory for future reuse + rather than freeing it. Defaults to ``False``. + """ + return self._guard() + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + return self.__cache.items() + + def values(self) -> typing.Iterable[VT]: + return self.__cache.values() + + def keys(self) -> typing.Iterable[KT]: + return self.__cache.keys() + + def __iter__(self) -> typing.Iterator[KT]: + return iter(self.__cache) + + def copy(self) -> "Frozen[KT, VT]": + return Frozen(self.__cache.copy(), ignore=self.ignore) + + def __copy__(self) -> "Frozen[KT, VT]": + return Frozen(self.__cache.copy(), ignore=self.ignore) + + def __repr__(self) -> str: + return "Frozen(%s)" % repr(self.__cache) + + +class _Lock: + __slots__ = ("_lock", "waiters") + + def __init__(self) -> None: + self._lock = _thread.allocate_lock() + self.waiters = 0 + + def __enter__(self) -> None: + self.waiters += 1 + self._lock.acquire() + + def __exit__(self, *_) -> None: + self.waiters -= 1 + self._lock.release() + + +class _AsyncLock: + __slots__ = ("_lock", "waiters") + + def __init__(self) -> None: + self._lock = asyncio.Lock() + self.waiters = 0 + + async def __aenter__(self) -> None: + self.waiters += 1 + await self._lock.acquire() + + async def __aexit__(self, *_) -> None: + self.waiters -= 1 + self._lock.release() + + +CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "length"]) +EVENT_MISS = 1 +EVENT_HIT = 2 + + +def _cached_wrapper( + func, + cache: BaseCacheImpl | typing.Callable, + key_maker: typing.Callable[[tuple, dict], typing.Hashable], + clear_reuse: bool, + callback: typing.Callable[[int, typing.Any, typing.Any], None] | None, + postprocess: _PostProcess, +): + cache_is_fn = callable(cache) + + # Per-instance caches receive `self` as args[0]; exclude it from the ke + _make_key = ( + (lambda a, k: key_maker(*a[1:], **k)) + if cache_is_fn + else (lambda a, k: key_maker(*a, **k)) + ) + + hits = misses = 0 + locks: dict[typing.Hashable, _Lock] = {} + pending_errors: dict[typing.Hashable, BaseException] = {} + + def _wrapped(*args, **kwds): + nonlocal hits, misses + + # Passing `cachebox__ignore=True` bypasses the cache and + # calls the function directly. + if kwds.pop("cachebox__ignore", False): + return func(*args, **kwds) + + _cache: BaseCacheImpl = cache(args[0]) if cache_is_fn else cache # type: ignore[arg-type] + key = _make_key(args, kwds) + + # Most calls are expected to hit the cache; avoid acquiring a lock. + # Implementations are thread-safe. + try: + result = _cache[key] + hits += 1 + if callback is not None: + callback(EVENT_HIT, key, result) + + return postprocess(result) if postprocess is not None else result + except KeyError: + pass + + lock = locks.get(key) + if lock is None: + locks[key] = lock = _Lock() + + # Acquire the per-key lock so that only one task computes the value + # while the rest wait. + with lock: + # Re-raise any exception stored by a previous owner so that all + # waiters fail with the same error. + err = pending_errors.get(key) + if err is not None: + if lock.waiters == 0: + del pending_errors[key] + raise err + + # Re-check the cache; a previous waiter may have already populated + # it while we were waiting for the lock. + try: + result = _cache[key] + hits += 1 + event = EVENT_HIT + except KeyError: + try: + result = func(*args, **kwds) + except Exception as exc: + if lock.waiters > 0: + pending_errors[key] = exc + raise + else: + _cache[key] = result + misses += 1 + event = EVENT_MISS + + if lock.waiters == 0: + locks.pop(key, None) + + if callback is not None: + callback(event, key, result) + + return postprocess(result) if postprocess is not None else result + + if not cache_is_fn: + _wrapped.cache = cache # type: ignore[attr-defined] + _wrapped.cache_info = lambda: CacheInfo(hits, misses, cache.maxsize, len(cache)) # type: ignore[attr-defined] + + def cache_clear() -> None: + nonlocal hits, misses + cache.clear(reuse=clear_reuse) # type: ignore[union-attr] + hits = misses = 0 + locks.clear() + pending_errors.clear() + + _wrapped.cache_clear = cache_clear # type: ignore[attr-defined] + + _wrapped.callback = callback # type: ignore[attr-defined] + return _wrapped + + +def _async_cached_wrapper( + func, + cache: BaseCacheImpl | typing.Callable, + key_maker: typing.Callable[..., typing.Hashable], + clear_reuse: bool, + callback: typing.Callable | None, + postprocess: _PostProcess, +): + cache_is_fn = callable(cache) + _make_key = ( + (lambda a, k: key_maker(*a[1:], **k)) + if cache_is_fn + else (lambda a, k: key_maker(*a, **k)) + ) + + hits = misses = 0 + locks: dict[typing.Hashable, _AsyncLock] = {} + pending_errors: dict[typing.Hashable, BaseException] = {} + + async def _wrapped(*args, **kwds): + nonlocal hits, misses + + # Passing `cachebox__ignore=True` bypasses the cache and + # calls the function directly. + if kwds.pop("cachebox__ignore", False): + return await func(*args, **kwds) + + _cache: BaseCacheImpl = cache(args[0]) if cache_is_fn else cache # type: ignore[arg-type] + key = _make_key(args, kwds) + + # Hot path - no lock needed. + try: + result = _cache[key] + hits += 1 + if callback is not None: + ret = callback(EVENT_HIT, key, result) + if inspect.isawaitable(ret): + await ret + return postprocess(result) if postprocess is not None else result + except KeyError: + pass + + lock = locks.get(key) + if lock is None: + locks[key] = lock = _AsyncLock() + + async with lock: + err = pending_errors.get(key) + if err is not None: + if lock.waiters == 0: + del pending_errors[key] + + raise err + + try: + result = _cache[key] + hits += 1 + event = EVENT_HIT + except KeyError: + try: + result = await func(*args, **kwds) + except Exception as exc: + if lock.waiters > 0: + pending_errors[key] = exc + raise + else: + _cache[key] = result + misses += 1 + event = EVENT_MISS + + if lock.waiters == 0: + locks.pop(key, None) + + if callback is not None: + ret = callback(event, key, result) + if inspect.isawaitable(ret): + await ret + + return postprocess(result) if postprocess is not None else result + + if not cache_is_fn: + _wrapped.cache = cache # type: ignore[attr-defined] + _wrapped.cache_info = lambda: CacheInfo(hits, misses, cache.maxsize, len(cache)) # type: ignore[attr-defined] + + def cache_clear() -> None: + nonlocal hits, misses + cache.clear(reuse=clear_reuse) # type: ignore[union-attr] + hits = misses = 0 + locks.clear() + pending_errors.clear() + + _wrapped.cache_clear = cache_clear # type: ignore[attr-defined] + + _wrapped.callback = callback # type: ignore[attr-defined] + return _wrapped + + +def cached( + cache: BaseCacheImpl | dict | typing.Callable[..., BaseCacheImpl] | None = None, + key_maker: typing.Callable[..., typing.Hashable] = make_key, + clear_reuse: bool = False, + callback: typing.Callable[[int, typing.Any, typing.Any], typing.Any] | None = None, + postprocess: _PostProcess = postprocess_copy_mutables, +) -> typing.Callable[[FT], FT]: + """ + Decorator to memoize function/method results. + + Args: + cache: Cache instance, ``dict``, or callable ``(self) -> cache`` for + per-instance caches. ``None`` defaults to an unbounded + :class:`LRUCache`. + key_maker: Converts ``(args, kwds)`` to a hashable key. Built-ins: + :func:`make_key` (default), :func:`make_hash_key`, + :func:`make_typed_key`. + clear_reuse: Pass ``reuse=True`` to ``cache.clear()`` when + :func:`cache_clear` is called. + callback: Called as ``callback(event, key, value)`` on every hit/miss. + May be a coroutine in async contexts. + postprocess: Optional ``(value) -> value`` transform applied before + returning a result to the caller. Ready-to-use options: + + * ``None`` - return the cached object as-is. + * :func:`postprocess_copy` - shallow-copy. + * :func:`postprocess_copy_mutables` - shallow-copy only `dict`, `list` and `set` (default). + * :func:`postprocess_deepcopy` - deep-copy. + * :func:`postprocess_deepcopy_mutables` - deep-copy only `dict`, `list` and `set`. + + Pass ``cachebox__ignore=True`` at call-time to bypass the cache. + + Examples:: + + @cachebox.cached(cachebox.LRUCache(128)) + def add(a, b): + return a + b + + # Per-instance method cache + class Foo: + def __init__(self): + self._cache = cachebox.LRUCache(0) + + @cachebox.cached(lambda self: self._cache) + def compute(self, n): + return n * 2 + """ + if cache is None: + cache = LRUCache(0) + elif type(cache) is dict: + cache = LRUCache(0, cache) # type: ignore[arg-type] + + cache_is_fn = callable(cache) + if not isinstance(cache, BaseCacheImpl) and not cache_is_fn: + raise TypeError("expected a cachebox cache or a callable, got %r" % (cache,)) + + def decorator(func: FT) -> FT: + builder = ( + _async_cached_wrapper + if inspect.iscoroutinefunction(func) + else _cached_wrapper + ) + wrapper = builder(func, cache, key_maker, clear_reuse, callback, postprocess) # type: ignore[arg-type] + return functools.update_wrapper(wrapper, func) # type: ignore[return-value] + + return decorator + + +def is_cached(func: object) -> bool: + """Return ``True`` if *func* was decorated with :func:`cached`.""" + return hasattr(func, "cache") and isinstance(func.cache, BaseCacheImpl) # type: ignore[union-attr] diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..4640904 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1 @@ +# TODO From 91a8ded41c12ed6b8b02355ca3dd1836f934d4d2 Mon Sep 17 00:00:00 2001 From: awolverp Date: Sat, 30 May 2026 16:21:06 +0330 Subject: [PATCH 42/60] Fix & test utils --- cachebox/__init__.py | 12 +- cachebox/utils.py | 82 ++++++++- requirements-dev.txt | 2 +- tests/test_utils.py | 407 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 487 insertions(+), 16 deletions(-) diff --git a/cachebox/__init__.py b/cachebox/__init__.py index 906a5e1..ca3b6d9 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -8,18 +8,18 @@ from ._cachebox import VTTLCache as VTTLCache from ._core import __version__ as __version__ from ._core import _small_offset_feature as _small_offset_feature - -# Utils +from .utils import EVENT_HIT as EVENT_HIT +from .utils import EVENT_MISS as EVENT_MISS from .utils import Frozen as Frozen from .utils import cached as cached +from .utils import clear_cached_cache as clear_cached_cache +from .utils import get_cached_cache as get_cached_cache +from .utils import get_cached_cache_info as get_cached_cache_info +from .utils import get_cached_callback as get_cached_callback from .utils import is_cached as is_cached - -# Key maker functions from .utils import make_hash_key as make_hash_key from .utils import make_key as make_key from .utils import make_typed_key as make_typed_key - -# Postprocess functions from .utils import postprocess_copy as postprocess_copy from .utils import postprocess_copy_mutables as postprocess_copy_mutables from .utils import postprocess_deepcopy as postprocess_deepcopy diff --git a/cachebox/utils.py b/cachebox/utils.py index f5a131f..0e513f1 100644 --- a/cachebox/utils.py +++ b/cachebox/utils.py @@ -17,7 +17,8 @@ DT = typing.TypeVar("DT") FT = typing.TypeVar("FT", bound=typing.Callable[..., typing.Any]) -_PostProcess: typing.TypeAlias = typing.Callable[[typing.Any], typing.Any] | None +_PostProcess: typing.TypeAlias = typing.Callable[[typing.Any], typing.Any] +_Callback: typing.TypeAlias = typing.Callable[[int, typing.Any, typing.Any], typing.Any] _COPY_TYPES = frozenset((dict, list, set)) @@ -380,7 +381,7 @@ def _cached_wrapper( key_maker: typing.Callable[[tuple, dict], typing.Hashable], clear_reuse: bool, callback: typing.Callable[[int, typing.Any, typing.Any], None] | None, - postprocess: _PostProcess, + postprocess: _PostProcess | None, ): cache_is_fn = callable(cache) @@ -481,8 +482,8 @@ def _async_cached_wrapper( cache: BaseCacheImpl | typing.Callable, key_maker: typing.Callable[..., typing.Hashable], clear_reuse: bool, - callback: typing.Callable | None, - postprocess: _PostProcess, + callback: _Callback | None, + postprocess: _PostProcess | None, ): cache_is_fn = callable(cache) _make_key = ( @@ -577,8 +578,8 @@ def cached( cache: BaseCacheImpl | dict | typing.Callable[..., BaseCacheImpl] | None = None, key_maker: typing.Callable[..., typing.Hashable] = make_key, clear_reuse: bool = False, - callback: typing.Callable[[int, typing.Any, typing.Any], typing.Any] | None = None, - postprocess: _PostProcess = postprocess_copy_mutables, + callback: _Callback | None = None, + postprocess: _PostProcess | None = postprocess_copy_mutables, ) -> typing.Callable[[FT], FT]: """ Decorator to memoize function/method results. @@ -603,7 +604,11 @@ def cached( * :func:`postprocess_deepcopy` - deep-copy. * :func:`postprocess_deepcopy_mutables` - deep-copy only `dict`, `list` and `set`. - Pass ``cachebox__ignore=True`` at call-time to bypass the cache. + Note: + Pass ``cachebox__ignore=True`` at call-time to bypass the cache. + If *cache* isn't a lambda/function, these attributes will be attached to + your function: ``cache`` (property), ``cache_info`` (callable), ``clear_cache`` (callable), + and ``callback`` (property). Examples:: @@ -642,5 +647,66 @@ def decorator(func: FT) -> FT: def is_cached(func: object) -> bool: - """Return ``True`` if *func* was decorated with :func:`cached`.""" + """ + Return ``True`` if *func* was decorated with :func:`cached`. + + Args: + func: an object or function to check. + """ return hasattr(func, "cache") and isinstance(func.cache, BaseCacheImpl) # type: ignore[union-attr] + + +def get_cached_cache(cached_func: object) -> BaseCacheImpl: + """ + A way to get ``cached_func.cache``, without type-hint warnings. + + Args: + cached_func: a function decorated with :func:`cached`. + + Warning: + If *func* wasn't decorated with :func:`cached`, or you passed a lambda/function as *cache* + to :func:`cached` decorator, raises ``AttributeError``. + """ + return cached_func.cache # type: ignore + + +def get_cached_cache_info(cached_func: object) -> CacheInfo: + """ + A way to get ``cached_func.cache_info()``, without type-hint warnings. + + Args: + cached_func: a function decorated with :func:`cached`. + + Warning: + If *func* wasn't decorated with :func:`cached`, or you passed a lambda/function as *cache* + to :func:`cached` decorator, raises ``AttributeError``. + """ + return cached_func.cache_info() # type: ignore + + +def get_cached_callback(cached_func: object) -> _Callback | None: + """ + A way to get ``cached_func.callback``, without type-hint warnings. + + Args: + cached_func: a function decorated with :func:`cached`. + + Warning: + If *func* wasn't decorated with :func:`cached`, or you passed a lambda/function as *cache* + to :func:`cached` decorator, raises ``AttributeError``. + """ + return cached_func.callback # type: ignore + + +def clear_cached_cache(cached_func: object) -> BaseCacheImpl: + """ + A way to call ``cached_func.cache_clear()``, without type-hint warnings. + + Args: + cached_func: a function decorated with :func:`cached`. + + Warning: + If *func* wasn't decorated with :func:`cached`, or you passed a lambda/function as *cache* + to :func:`cached` decorator, raises ``AttributeError``. + """ + return cached_func.cache_clear() # type: ignore diff --git a/requirements-dev.txt b/requirements-dev.txt index e05d79c..da5f4df 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,4 +2,4 @@ maturin pytest hypothesis pytest-benchmark -pygal +pytest-asyncio diff --git a/tests/test_utils.py b/tests/test_utils.py index 4640904..5ef6ace 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1 +1,406 @@ -# TODO +import asyncio +import time +import typing + +import pytest + +import cachebox + + +@pytest.fixture( + scope="function", + params=[ + cachebox.Cache, + cachebox.FIFOCache, + cachebox.LFUCache, + cachebox.LRUCache, + cachebox.TTLCache, + cachebox.RRCache, + cachebox.VTTLCache, + ], +) +def random_cache_impl(request): + typ: typing.Type[cachebox.BaseCacheImpl] = request.param + + def inner(maxsize, iterable=None): + if typ is cachebox.TTLCache: + return typ(maxsize, global_ttl=10, iterable=iterable) + + if typ is cachebox.VTTLCache: + return typ(maxsize, ttl=10, iterable=iterable) + + return typ(maxsize, iterable=iterable) + + return inner + + +def test_frozen(random_cache_impl: type[cachebox.BaseCacheImpl]): + cache = random_cache_impl(10, {i: i for i in range(8)}) + f = cachebox.Frozen(cache) + + assert f.maxsize == cache.maxsize + + with pytest.raises(TypeError): + f[0] = 0 + + with pytest.raises(TypeError): + f.pop(0) + + with pytest.raises(TypeError): + f.popitem() + + assert len(f) == 8 + assert len(f) == len(cache) + cache.insert(9, 9) + assert len(f) == 9 + assert len(f) == len(cache) + + f = cachebox.Frozen(cache, ignore=True) + f.popitem() + + +def test_cached(random_cache_impl: type[cachebox.BaseCacheImpl]): + obj = random_cache_impl(3) + + @cachebox.cached(obj) + def factorial(n: int): + fact = 1 + for num in range(2, n + 1): + fact *= num + + time.sleep(0.1) + return fact + + perf_1 = time.perf_counter() + factorial(15) + perf_1 = time.perf_counter() - perf_1 + + assert cachebox.get_cached_cache_info(factorial).length == 1 + assert cachebox.get_cached_cache_info(factorial).misses == 1 + + perf_2 = time.perf_counter() + factorial(15) + perf_2 = time.perf_counter() - perf_2 + + assert perf_1 > perf_2 + assert cachebox.get_cached_cache_info(factorial).hits == 1 + + cachebox.clear_cached_cache(factorial) + assert cachebox.get_cached_cache_info(factorial).hits == 0 + assert cachebox.get_cached_cache_info(factorial).misses == 0 + + perf_3 = time.perf_counter() + factorial(15) + perf_3 = time.perf_counter() - perf_3 + assert perf_3 > perf_2 + + # test cachebox__ignore + cachebox.clear_cached_cache(factorial) + assert len(cachebox.get_cached_cache(factorial)) == 0 + factorial(15, cachebox__ignore=True) # type: ignore + assert len(cachebox.get_cached_cache(factorial)) == 0 + + +def test_key_makers(random_cache_impl: type[cachebox.BaseCacheImpl]): + @cachebox.cached(random_cache_impl(125), key_maker=cachebox.make_key) + def func_1(a, b, c): + return a, b, c + + func_1(1, 2, 3) + func_1(1.0, 2, 3.0) + func_1(3, 2, 1) + + assert len(cachebox.get_cached_cache(func_1)) == 2 + + @cachebox.cached(random_cache_impl(125), key_maker=cachebox.make_typed_key) + def func_2(a, b, c): + return a, b, c + + func_2(1, 2, 3) + func_2(1.0, 2, 3.0) + func_2(3, 2, 1) + + assert len(cachebox.get_cached_cache(func_2)) == 3 + + +@pytest.mark.asyncio +async def test_async_cached(random_cache_impl: type[cachebox.BaseCacheImpl]): + obj = random_cache_impl(3) + + @cachebox.cached(obj) + async def factorial(n: int, _: str): + fact = 1 + for num in range(2, n + 1): + fact *= num + + await asyncio.sleep(0.1) # need for testing + return fact + + perf_1 = time.perf_counter() + await factorial(15, "cachebox") + perf_1 = time.perf_counter() - perf_1 + + assert cachebox.get_cached_cache_info(factorial).length == 1 + assert cachebox.get_cached_cache_info(factorial).misses == 1 + + perf_2 = time.perf_counter() + await factorial(15, "cachebox") + perf_2 = time.perf_counter() - perf_2 + + assert perf_1 > perf_2 + assert cachebox.get_cached_cache_info(factorial).hits == 1 + + cachebox.clear_cached_cache(factorial) + assert cachebox.get_cached_cache_info(factorial).hits == 0 + assert cachebox.get_cached_cache_info(factorial).misses == 0 + + perf_3 = time.perf_counter() + await factorial(15, "cachebox") + perf_3 = time.perf_counter() - perf_3 + assert perf_3 > perf_2 + + # test cachebox__ignore + cachebox.clear_cached_cache(factorial) + assert len(cachebox.get_cached_cache(factorial)) == 0 + await factorial(15, "me", cachebox__ignore=True) # type: ignore + assert len(cachebox.get_cached_cache(factorial)) == 0 + + +def test_cachedmethod(): + class TestCachedMethod: + def __init__(self, num) -> None: + self.num = num + + @cachebox.cached(None) + def method(self, char: str): + assert type(self) is TestCachedMethod + return char * self.num + + cls = TestCachedMethod(10) + assert cls.method("a") == ("a" * 10) + + cls = TestCachedMethod(2) + assert cls.method("a") == ("a" * 2) + + +def test_callback(random_cache_impl: type[cachebox.BaseCacheImpl]): + obj = random_cache_impl(3) + + called = list() + + @cachebox.cached( + obj, + key_maker=lambda n: n, + callback=lambda event, key, value: called.append((event, key, value)), + ) + def factorial(n: int, /): + fact = 1 + for num in range(2, n + 1): + fact *= num + + return fact + + assert factorial(5) == 120 + assert len(called) == 1 + assert called[0] == (cachebox.EVENT_MISS, 5, 120) + + assert factorial(5) == 120 + assert len(called) == 2 + assert called[1] == (cachebox.EVENT_HIT, 5, 120) + + assert factorial(3) == 6 + assert len(called) == 3 + assert called[2] == (cachebox.EVENT_MISS, 3, 6) + + assert cachebox.is_cached(factorial) + + +@pytest.mark.asyncio +async def test_async_cachedmethod(random_cache_impl: type[cachebox.BaseCacheImpl]): + class TestCachedMethod: + def __init__(self, num) -> None: + self.num = num + + @cachebox.cached(random_cache_impl(0)) + async def method(self, char: str): + assert type(self) is TestCachedMethod + return char * self.num + + cls = TestCachedMethod(10) + assert (await cls.method("a")) == ("a" * 10) + + +@pytest.mark.asyncio +async def test_async_callback(random_cache_impl: type[cachebox.BaseCacheImpl]): + obj = random_cache_impl(3) + + called = list() + + async def _callback(event, key, value): + called.append((event, key, value)) + + @cachebox.cached(obj, key_maker=lambda n: n, callback=_callback) + async def factorial(n: int, /): + fact = 1 + for num in range(2, n + 1): + fact *= num + + return fact + + assert await factorial(5) == 120 + assert len(called) == 1 + assert called[0] == (cachebox.EVENT_MISS, 5, 120) + + assert await factorial(5) == 120 + assert len(called) == 2 + assert called[1] == (cachebox.EVENT_HIT, 5, 120) + + assert await factorial(3) == 6 + assert len(called) == 3 + assert called[2] == (cachebox.EVENT_MISS, 3, 6) + + assert cachebox.is_cached(factorial) + assert not cachebox.is_cached(_callback) + + +def test_classmethod(): + class MyClass: + def __init__(self, num: int) -> None: + self.num = num + + @classmethod + @cachebox.cached(None, postprocess=cachebox.postprocess_copy) + def new(cls, num: int): + return cls(num) + + a = MyClass.new(1) + assert isinstance(a, MyClass) and a.num == 1 + + +def test_staticmethod(): + class MyClass: + def __init__(self, num: int) -> None: + self.num = num + + @staticmethod + @cachebox.cached(None, postprocess=cachebox.postprocess_copy) + def new(num: int): + return num + + a = MyClass.new(1) + assert isinstance(a, int) and a == 1 + + +def test_cached_method(random_cache_impl: type[cachebox.BaseCacheImpl]): + class Test: + def __init__(self, num) -> None: + self.num = num + self._cache = random_cache_impl(20) + + @cachebox.cached(lambda self: self._cache) + def method(self, char: str): + assert type(self) is Test + return char * self.num + + for i in range(10): + cls = Test(i) + assert cls.method("a") == ("a" * i) + + +def test_nested_cached_shared_cache(random_cache_impl: type[cachebox.BaseCacheImpl]): + obj = random_cache_impl(10) + + @cachebox.cached(obj, key_maker=cachebox.make_typed_key) + def func_inner(a: int, b: int): + return a + b + + @cachebox.cached( + obj, + # `key_maker`s should be different + key_maker=cachebox.make_key, + ) + def func_outer(a: int, b: int): + return f"{a} + {b} = {func_inner(a, b)}" + + assert func_outer(1, 2) == "1 + 2 = 3" + assert func_outer(1, 2) == "1 + 2 = 3" + assert func_outer(1, 2) == "1 + 2 = 3" + assert func_outer(1, 2) == "1 + 2 = 3" + assert func_outer(2, 3) == "2 + 3 = 5" + assert func_outer(a=2, b=3) == "2 + 3 = 5" + + +def test_recursive_cached(random_cache_impl: type[cachebox.BaseCacheImpl]): + obj = random_cache_impl(10) + + @cachebox.cached(obj) + def factorial(n): + if n < 0: + raise ValueError + if n == 0 or n == 1: + return 1 + else: + return n * factorial(n - 1) + + assert factorial(10) == 3628800 + assert factorial(5) == 120 + assert factorial(10) == 3628800 + assert factorial(5) == 120 + assert factorial(10) == 3628800 + assert factorial(2) == 2 + + +def test_recursive_threading_cached(): + import threading + + obj = cachebox.LRUCache(10) + + @cachebox.cached(obj) + def factorial(n): + if n < 0: + raise ValueError + if n == 0 or n == 1: + return 1 + else: + return n * factorial(n - 1) + + threads = list( + map( + lambda x: x.start() or x, + ( + threading.Thread(target=factorial, args=(10,), name=str(i)) + for i in range(10) + ), + ) + ) + for t in threads: + t.join(timeout=60) + + +@pytest.mark.asyncio +async def test_recursive_asyncio_cached(): + obj = cachebox.LRUCache(10) + + @cachebox.cached(obj) + async def factorial(n) -> int: + if n < 0: + raise ValueError + if n == 0 or n == 1: + return 1 + else: + return n * (await factorial(n - 1)) + + result = await asyncio.wait_for( + asyncio.gather( + factorial(10), + factorial(10), + factorial(10), + factorial(10), + factorial(10), + factorial(10), + factorial(10), + factorial(10), + ), + 10, + ) + assert result == ([3628800] * 8) From e0864082d7f88617f94b67c5aba85beb2a76dd58 Mon Sep 17 00:00:00 2001 From: awolverp Date: Sun, 31 May 2026 11:55:50 +0330 Subject: [PATCH 43/60] Update README.md --- README.md | 736 +----------------------------------------------------- 1 file changed, 12 insertions(+), 724 deletions(-) diff --git a/README.md b/README.md index 0e01c8c..b1acb5a 100644 --- a/README.md +++ b/README.md @@ -4,19 +4,20 @@ *The fastest caching Python library written in Rust* -[**Releases**](https://github.com/awolverp/cachebox/releases) | +[**Documentation**](https://awolverp.github.com/cachebox) | [**Releases**](https://github.com/awolverp/cachebox/releases) | [**Benchmarks**](https://github.com/awolverp/cachebox-benchmark) | [**Issues**](https://github.com/awolverp/cachebox/issues/new) [![License](https://img.shields.io/github/license/awolverp/cachebox.svg?style=flat-square)](https://github.com/awolverp/cachebox/blob/main/LICENSE) -[![Release](https://img.shields.io/github/v/release/awolverp/cachebox.svg?style=flat-square)](https://github.com/awolverp/cachebox/releases) -[![Python Versions](https://img.shields.io/pypi/pyversions/cachebox.svg?style=flat-square)](https://pypi.org/project/cachebox/) [![Downloads](https://img.shields.io/pypi/dm/cachebox?style=flat-square&color=%23314bb5)](https://pepy.tech/projects/cachebox) ------- +> [!WARNING]\ +> The new version v6 has incompatibilities with v5. For more info see [Incompatible changes](#incompatible-changes). + ### What does it do? You can easily perform powerful caching operations in Python as fast as possible. This can make your application a lot faster and it can be a good choice in complex applications. @@ -24,23 +25,14 @@ This can make your application a lot faster and it can be a good choice in compl **Key Features:** - 🚀 Extremely fast (10-50x faster than other caching libraries -- [*benchmarks*](https://github.com/awolverp/cachebox-benchmark)) -- 📊 Minimal memory footprint (50% of standard dictionary memory usage) +- 📊 Minimal memory footprint - 🔥 Full-featured and user-friendly - 🧶 Completely thread-safe - 🔧 Tested and correct - **\[R\]** written in Rust for maximum performance -- 🤝 Compatible with Python 3.9+ (PyPy and CPython) +- 🤝 Compatible with Python 3.10+ (PyPy and CPython) - 📦 Supports 7 advanced caching algorithms -### Page Contents -- ❓ [**When do I need caching and `cachebox`?**](#when-do-i-need-caching-and-cachebox) -- 🌟 [**Why `cachebox`?**](#why-cachebox) -- 🔧 [**Installation**](#installation) -- 💡 [**Preview**](#examples) -- 🎓 [**Getting started**](#getting-started) -- ✏️ [**Incompatible changes**](#%EF%B8%8F-incompatible-changes) -- 📌 [**Tips & Notes**](#tips-and-notes) - ### When do I need caching and `cachebox`? - 📈 **Frequent Data Access** \ If you need to access the same data multiple times, caching can help reduce the number of database queries or API calls, improving performance. @@ -65,19 +57,19 @@ This can make your application a lot faster and it can be a good choice in compl It uses the *Rust* language for high-performance. - **🧮 SwissTable** \ -It uses Google's high-performance SwissTable hash map. Credit to [hashbrown](https://github.com/rust-lang/hashbrown). +It uses Google's high-performance SwissTable hash map. Thanks to [hashbrown](https://github.com/rust-lang/hashbrown). - **✨ Low memory usage** \ It has very low memory usage. - **⭐ Zero Dependency** \ -As we said, `cachebox` is written in Rust so you don't have to install any other dependecies. +As we said, `cachebox` is written in *Rust* so you don't have to install any other dependecies. - **🧶 Thread safe** \ -It's completely thread-safe and uses locks to prevent problems. +It's completely thread-safe and uses *Rust* mutex to prevent problems. - **👌 Easy To Use** \ -You only need to import it and choose a cache implementation to use. It will behave like a dictionary. +You only need to import it and choose a cache implementation to use. - **🚫 Avoids Cache Stampede** \ It avoids [cache stampede](https://en.wikipedia.org/wiki/Cache_stampede) by using a distributed lock system. @@ -89,15 +81,11 @@ cachebox is installable via `pip`: pip3 install -U cachebox ``` -> [!WARNING]\ -> The new version v5 has some incompatibilities with v4. For more info see [Incompatible changes](#incompatible-changes). - ## Examples The simplest example of **cachebox** could look like this: ```python import cachebox -# Like functools.lru_cache, If maxsize is set to 0, the cache can grow without bounds and limit. @cachebox.cached(cachebox.FIFOCache(maxsize=128)) def factorial(number: int) -> int: fact = 1 @@ -106,7 +94,6 @@ def factorial(number: int) -> int: return fact assert factorial(5) == 125 -assert len(factorial.cache) == 1 # coroutines are also supported @cachebox.cached(cachebox.LRUCache(maxsize=128)) @@ -142,707 +129,8 @@ assert cache["key"] == "value" assert cache.get("key") == "value" ``` -## Getting started -There are 3 useful functions: -- [**cached**](#cached--decorator): a decorator that helps you to cache your functions and calculations with a lot of options. -- [**is_cached**](#is_cached--function): check if a function/method cached by cachebox or not - -And 9 classes: -- [**BaseCacheImpl**](#basecacheimpl-️-class): base-class for all classes. -- [**Cache**](#cache-️-class): A simple cache that has no algorithm; this is only a hashmap. -- [**FIFOCache**](#fifocache-️-class): the FIFO cache will remove the element that has been in the cache the longest. -- [**RRCache**](#rrcache-️-class): the RR cache will remove a random element to make free up space when necessary. -- [**LRUCache**](#lrucache-️-class): the LRU cache will remove the element in the cache that has not been accessed in the longest time. -- [**LFUCache**](#lfucache-️-class): the LFU cache will remove the element in the cache that has been accessed the least often, regardless of time. -- [**TTLCache**](#ttlcache-️-class): the TTL cache will automatically remove the element in the cache that has expired. -- [**VTTLCache**](#vttlcache-️-class): the TTL cache will automatically remove the element in the cache that has expired when needed. -- [**Frozen**](#frozen-️-class): you can use this class for freezing your caches. - -You only need to import the classes you want and can work with them like a regular dictionaries (except for [VTTLCache](#vttlcache-️-class), this have some differences). - -The examples below will introduce you to these different features. -**All the methods in the examples are common across all classes (exceptions are noted where applicable).** - -* * * - -### `cached` (🎀 decorator) -Decorator to wrap a function with a memoizing callable that saves results in a cache. - -**Parameters:** -- `cache`: Specifies a cache that handles and stores the results. if `None` or `dict`, `FIFOCache` will be used. - -- `key_maker`: Specifies a function that will be called with the same positional and keyword - arguments as the wrapped function itself. It has to return a suitable cache key - (must be hashable). - -- `clear_reuse`: The wrapped function has a function named `clear_cache` that uses `cache.clear` - method to clear the cache. This parameter will be passed to cache's `clear` method. - -- `callback`: Every time the `cache` is used, callback is also called. - The callback arguments are: event number (see `EVENT_MISS` or `EVENT_HIT` variables), key, and then result. - -- `copy_level`: The wrapped function always copies the result of your function and then returns it. - This parameter specifies how the result is copied before returning it. - `0` means "never copy", `1` means "only copy `dict`, `list`, and `set` results" and - `2` means "always copy the results". Defaults to 1. - -

-Examples - - -A simple example: -```python -import cachebox - -@cachebox.cached(cachebox.LRUCache(128)) -def sum_as_string(a, b): - return str(a+b) - -assert sum_as_string(1, 2) == "3" - -assert len(sum_as_string.cache) == 1 -sum_as_string.cache_clear() -assert len(sum_as_string.cache) == 0 -``` - -A `key_maker` example: -```python -import cachebox - -def simple_key_maker(args: tuple, kwds: dict): - return args[0].path - -# Async methods are supported -@cachebox.cached(cachebox.LRUCache(128), key_maker=simple_key_maker) -async def request_handler(request: Request): - return Response("hello man") -``` - -A typed `key_maker` example using a predefined key function: -```python -import cachebox - -@cachebox.cached(cachebox.LRUCache(128), key_maker=cachebox.make_typed_key) -def sum_as_string(a, b): - return str(a+b) - -sum_as_string(1.0, 1) -sum_as_string(1, 1) -print(len(sum_as_string.cache)) # 2 -``` - -You have the option to manage caches with `.cache` attribute as shown in previous examples. -There are more attributes and methods you can use: -```python -import cachebox - -@cachebox.cached(cachebox.LRUCache(0)) -def sum_as_string(a, b): - return str(a+b) - -print(sum_as_string.cache) -# LRUCache(0 / 9223372036854775807, capacity=0) - -print(sum_as_string.cache_info()) -# CacheInfo(hits=0, misses=0, maxsize=9223372036854775807, length=0, memory=8) - -# `.cache_clear()` clears the cache -sum_as_string.cache_clear() -``` - -method example: *(Added in v5.1.0)* -```python -import cachebox - -class Example: - def __init__(self, num) -> None: - self.num = num - self._cache = cachebox.TTLCache(20, 10) - - @cachebox.cached(lambda self: self._cache) - def method(self, char: str): - return char * self.num - -ex = Example(10) -assert ex.method("a") == "a" * 10 -``` - -`callback` example: *(Added in v4.2.0)* -```python -import cachebox - -def callback_func(event: int, key, value): - if event == cachebox.EVENT_MISS: - print("callback_func: miss event", key, value) - elif event == cachebox.EVENT_HIT: - print("callback_func: hit event", key, value) - else: - # unreachable code - raise NotImplementedError - -@cachebox.cached(cachebox.LRUCache(0), callback=callback_func) -def func(a, b): - return a + b - -assert func(1, 2) == 3 -# callback_func: miss event (1, 2) 3 - -assert func(1, 2) == 3 # hit -# callback_func: hit event (1, 2) 3 - -assert func(1, 2) == 3 # hit again -# callback_func: hit event (1, 2) 3 - -assert func(5, 4) == 9 -# callback_func: miss event (5, 4) 9 -``` - -
- -> [!TIP]\ -> There's a new feature **since `v4.1.0`** for making a cached function not use cache for a call: -> ```python -> # with `cachebox__ignore=True` parameter, cachebox does not use cache and directly calls the function, returning its result. -> sum_as_string(10, 20, cachebox__ignore=True) -> ``` - -* * * - -### `cachedmethod` (🎀 decorator) -This decorator works excatly like `cached()`, but ignores `self` parameters in hashing and key making. - -> [!WARNING]\ -> This function has been deprecated since `v5.1.0`, use `cached` function instead. - -
-Example - -```python -import cachebox - -class MyClass: - @cachebox.cachedmethod(cachebox.TTLCache(0, ttl=10)) - def my_method(self, name: str): - return "Hello, " + name + "!" - -c = MyClass() -c.my_method() -``` - -
- -* * * - -### `is_cached` (📦 function) -Checks whether a function/method is cached by cachebox or not. - -**Parameters:** -- `func`: The function/method to check. - -
-Example - -```python -import cachebox - -@cachebox.cached(cachebox.FIFOCache(0)) -def func(): - pass - -assert cachebox.is_cached(func) -``` - -
- -* * * - -### `BaseCacheImpl` (🏗️ class) -Base implementation for cache classes in the cachebox library. - -This abstract base class defines the generic structure for cache implementations, -supporting different key and value types through generic type parameters. -Serves as a foundation for specific cache variants like Cache and FIFOCache. - -
-Example - -```python -import cachebox - -# subclass -class ClassName(cachebox.BaseCacheImpl): - ... - -# type-hint -def func(cache: BaseCacheImpl): - ... - -# isinstance -cache = cachebox.LFUCache(0) -assert isinstance(cache, cachebox.BaseCacheImpl) -``` - -
- -* * * - -### `Cache` (🏗️ class) -A thread-safe, memory-efficient hashmap-like cache with configurable maximum size. - -Provides a flexible key-value storage mechanism with: -- Configurable maximum size (zero means unlimited) -- Lower memory usage compared to standard dict -- Thread-safe operations -- Useful memory management methods - -Supports initialization with optional initial data and capacity -and provides dictionary-like access with additional cache-specific operations. - -> [!TIP]\ -> Differs from standard `dict` by: -> - being thread-safe and unordered, while dict isn't thread-safe and ordered (Python 3.6+). -> - using much less memory than dict. -> - supporting useful and new methods for managing memory, while dict does not. -> - **not supporting** `popitem()`, while dict does. -> - an option to limit the size of `Cache` which dict doesn't support. - -| | get | insert | delete | popitem | -| ------------ | ----- | ------- | ------ | ------- | -| Worse-case | O(1) | O(1) | O(1) | N/A | - -
-Example - -```python -from cachebox import Cache - -# These parameters are common in classes: -# `maxsize` specifies the limit size of the cache (zero means infinity); this is unchangable. -# `iterable` allows creating a cache from a dict or an iterable. -# `capacity` will make the cache attempt to allocate a new hash table with at -# least enough capacity for inserting the given number of elements without reallocating. -cache = Cache(maxsize=100, iterable=None, capacity=100) - -# behaves like a regular dict -cache["key"] = "value" -# using `.insert(key, value)` is recommended -cache.insert("key", "value") - -print(cache["key"]) # value - -del cache["key"] -cache["key"] # KeyError: key - -# cachebox.Cache does not have any policy, so will raise OverflowError if the capacity is exceeded -cache.update({i:i for i in range(200)}) -# OverflowError: The cache has reached the bound. -``` - -
- -* * * - -### `FIFOCache` (🏗️ class) -A First-In-First-Out (FIFO) cache implementation with configurable maximum size and optional initial capacity. - -This cache provides a fixed-size container that automatically removes the oldest items when the maximum size is reached. - -**Key features**: -- Deterministic item eviction order (oldest items removed first) -- Efficient key-value storage and retrieval -- Supports dictionary-like operations -- Allows optional initial data population - -| | get | insert | delete | popitem | -| ------------ | ----- | ------- | ------------- | ------- | -| Worse-case | O(1) | O(1) | O(min(i, n-i)) | O(1) | - -
-Example - -```python -from cachebox import FIFOCache - -cache = FIFOCache(5, {i:i*2 for i in range(5)}) - -print(len(cache)) # 5 -cache["new-key"] = "new-value" -print(len(cache)) # 5 - -print(cache.get(3, "default-val")) # 6 -print(cache.get(6, "default-val")) # default-val - -print(cache.popitem()) # (1, 2) - -# insert method returns a value: -# - If the cache did not have this key present, None is returned. -# - If the cache did have this key present, the value is updated, and the old value is returned. -print(cache.insert(3, "val")) # 6 -print(cache.insert("new-key", "val")) # None - -# Returns the first key in cache; this is the one which will be removed by `popitem()`. -print(cache.first()) -``` - -
- -* * * - -### `RRCache` (🏗️ class) -A thread-safe cache implementation with Random Replacement (RR) policy. - -This cache randomly selects and removes elements when the cache reaches its maximum size, -ensuring a simple and efficient caching mechanism with configurable capacity. - -Supports operations like insertion, retrieval, deletion, and iteration with O(1) complexity. - -| | get | insert | delete | popitem | -| ------------ | ----- | ------- | ------ | ------- | -| Worse-case | O(1) | O(1) | O(1) | O(1) | - -
-Example - -```python -from cachebox import RRCache - -cache = RRCache(10, {i:i for i in range(10)}) -print(cache.is_full()) # True -print(cache.is_empty()) # False - -# Returns the number of elements the map can hold without reallocating. -print(cache.capacity()) # 28 - -# Shrinks the cache to fit len(self) elements. -cache.shrink_to_fit() -print(cache.capacity()) # 10 - -# Returns a random key -print(cache.random_key()) # 4 -``` - -
- -* * * - -### `LRUCache` (🏗️ class) -Thread-safe Least Recently Used (LRU) cache implementation. - -Provides a cache that automatically removes the least recently used items when -the cache reaches its maximum size. Supports various operations like insertion, -retrieval, and management of cached items with configurable maximum size and -initial capacity. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(1)~ | O(1)~ | - -
-Example - -```python -from cachebox import LRUCache - -cache = LRUCache(0, {i:i*2 for i in range(10)}) - -# access `1` -print(cache[0]) # 0 -print(cache.least_recently_used()) # 1 -print(cache.popitem()) # (1, 2) - -# .peek() searches for a key-value in the cache and returns it without moving the key to recently used. -print(cache.peek(2)) # 4 -print(cache.popitem()) # (3, 6) - -# Does the `popitem()` `n` times and returns count of removed items. -print(cache.drain(5)) # 5 -``` - -
- -* * * - -### `LFUCache` (🏗️ class) -A thread-safe Least Frequently Used (LFU) cache implementation. - -This cache removes elements that have been accessed the least number of times, -regardless of their access time. It provides methods for inserting, retrieving, -and managing cache entries with configurable maximum size and initial capacity. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | - -
-Example - -```python -from cachebox import LFUCache - -cache = cachebox.LFUCache(5) -cache.insert('first', 'A') -cache.insert('second', 'B') - -# access 'first' twice -cache['first'] -cache['first'] - -# access 'second' once -cache['second'] - -assert cache.least_frequently_used() == 'second' -assert cache.least_frequently_used(2) is None # 2 is out of range - -for item in cache.items_with_frequency(): - print(item) -# ('second', 'B', 1) -# ('first', 'A', 2) -``` - -
- -* * * - -### `TTLCache` (🏗️ class) -A thread-safe Time-To-Live (TTL) cache implementation with configurable maximum size and expiration. - -This cache automatically removes elements that have expired based on their time-to-live setting. -Supports various operations like insertion, retrieval, and iteration. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(n) | - -
-Example - -```python -from cachebox import TTLCache -import time - -# The `ttl` param specifies the time-to-live value for each element in cache (in seconds); cannot be zero or negative. -cache = TTLCache(0, ttl=2) -cache.update({i:str(i) for i in range(10)}) - -print(cache.get_with_expire(2)) # ('2', 1.99) - -# Returns the oldest key in cache; this is the one which will be removed by `popitem()` -print(cache.first()) # 0 - -cache["mykey"] = "value" -time.sleep(2) -cache["mykey"] # KeyError -``` - -
- -* * * - -### `VTTLCache` (🏗️ class) -A thread-safe, time-to-live (TTL) cache implementation with per-key expiration policy. - -This cache allows storing key-value pairs with optional expiration times. When an item expires, -it is automatically removed from the cache. The cache supports a maximum size and provides -various methods for inserting, retrieving, and managing cached items. - -Key features: -- Per-key time-to-live (TTL) support -- Configurable maximum cache size -- Thread-safe operations -- Automatic expiration of items - -Supports dictionary-like operations such as get, insert, update, and iteration. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | - -> [!TIP]\ -> `VTTLCache` vs `TTLCache`: -> - In `VTTLCache` each item has its own unique time-to-live, unlike `TTLCache`. -> - `VTTLCache` is generally slower than `TTLCache`. - -
-Example - -```python -from cachebox import VTTLCache -import time - -# The `ttl` param specifies the time-to-live value for `iterable` (in seconds); cannot be zero or negative. -cache = VTTLCache(100, iterable={i:i for i in range(4)}, ttl=3) -print(len(cache)) # 4 -time.sleep(3) -print(len(cache)) # 0 - -# The "key1" is exists for 5 seconds -cache.insert("key1", "value", ttl=5) -# The "key2" is exists for 2 seconds -cache.insert("key2", "value", ttl=2) - -time.sleep(2) -# "key1" is exists for 3 seconds -print(cache.get("key1")) # value - -# "key2" has expired -print(cache.get("key2")) # None -``` - -
- -* * * - -### `Frozen` (🏗️ class) -**This is not a cache**; This is a wrapper class that prevents modifications to an underlying cache implementation. - -This class provides a read-only view of a cache, optionally allowing silent -suppression of modification attempts instead of raising exceptions. - -
-Example - -```python -from cachebox import Frozen, FIFOCache - -cache = FIFOCache(10, {1:1, 2:2, 3:3}) - -# parameters: -# cls: your cache -# ignore: If False, will raise TypeError if anyone try to change cache. will do nothing otherwise. -frozen = Frozen(cache, ignore=True) -print(frozen[1]) # 1 -print(len(frozen)) # 3 - -# Frozen ignores this action and do nothing -frozen.insert("key", "value") -print(len(frozen)) # 3 - -# Let's try with ignore=False -frozen = Frozen(cache, ignore=False) - -frozen.insert("key", "value") -# TypeError: This cache is frozen. -``` - -
- -> [!NOTE]\ -> The **Frozen** class can't prevent expiring in [TTLCache](#ttlcache) or [VTTLCache](#vttlcache). -> -> For example: -> ```python -> cache = TTLCache(0, ttl=3, iterable={i:i for i in range(10)}) -> frozen = Frozen(cache) -> -> time.sleep(3) -> print(len(frozen)) # 0 -> ``` - -## ⚠️ Incompatible Changes -These are changes that are not compatible with the previous version: - -**You can see more info about changes in [Changelog](CHANGELOG.md).** - -#### CacheInfo's cachememory attribute renamed! -The `CacheInfo.cachememory` was renamed to `CacheInfo.memory`. - -```python -@cachebox.cached({}) -def func(a: int, b: int) -> str: - ... - -info = func.cache_info() - -# Older versions -print(info.cachememory) - -# New version -print(info.memory) -``` - -#### Errors in the `__eq__` method will not be ignored! -Now the errors which occurred while doing `__eq__` operations will not be ignored. - -```python -class A: - def __hash__(self): - return 1 - - def __eq__(self, other): - raise NotImplementedError("not implemeneted") - -cache = cachebox.FIFOCache(0, {A(): 10}) - -# Older versions: -cache[A()] # => KeyError - -# New version: -cache[A()] -# Traceback (most recent call last): -# File "script.py", line 11, in -# cache[A()] -# ~~~~~^^^^^ -# File "script.py", line 7, in __eq__ -# raise NotImplementedError("not implemeneted") -# NotImplementedError: not implemeneted -``` - -#### Cache comparisons will not be strict! -In older versions, cache comparisons depended on the caching algorithm. Now, they work just like dictionary comparisons. - -```python -cache1 = cachebox.FIFOCache(10) -cache2 = cachebox.FIFOCache(10) - -cache1.insert(1, 'first') -cache1.insert(2, 'second') - -cache2.insert(2, 'second') -cache2.insert(1, 'first') - -# Older versions: -cache1 == cache2 # False - -# New version: -cache1 == cache2 # True -``` - -## Tips and Notes -#### How to save caches in files? -There's no built-in file-based implementation, but you can use `pickle` for saving caches in files. For example: -```python -import cachebox -import pickle -c = cachebox.LRUCache(100, {i:i for i in range(78)}) - -with open("file", "wb") as fd: - pickle.dump(c, fd) - -with open("file", "rb") as fd: - loaded = pickle.load(fd) - -assert c == loaded -assert c.capacity() == loaded.capacity() -``` - -> [!TIP]\ -> For more, see this [issue](https://github.com/awolverp/cachebox/issues/8). - -* * * - -#### How to copy the caches? -You can use `copy.deepcopy` or `cache.copy` for copying caches. For example: -```python -import cachebox -cache = cachebox.LRUCache(100, {i:i for i in range(78)}) - -# shallow copy -shallow = cache.copy() - -# deep copy -import copy -deep = copy.deepcopy(cache) -``` +## Learn more +Read the documentation for full information and learn more: [**Documentation**](https://awolverp.github.com/cachebox) ## License This repository is licensed under the [MIT License](LICENSE) From 68113563116410f729ff16833871add41e0633d5 Mon Sep 17 00:00:00 2001 From: awolverp Date: Sun, 31 May 2026 16:55:49 +0330 Subject: [PATCH 44/60] Write docs using mkdocs-material --- README.md | 2 +- cachebox/_core.pyi | 2 +- cachebox/utils.py | 9 +- docs/docs/api/impls.md | 153 +++++++++++++++++++++++ docs/docs/api/index.md | 0 docs/docs/api/utils.md | 1 + docs/docs/getting-started.md | 229 +++++++++++++++++++++++++++++++++++ docs/docs/index.md | 76 ++++++++++++ docs/docs/installation.md | 32 +++++ docs/docs/migration.md | 150 +++++++++++++++++++++++ docs/docs/tips.md | 180 +++++++++++++++++++++++++++ docs/mkdocs.yml | 96 +++++++++++++++ requirements-dev.txt | 2 + src/pyclasses/cache.rs | 59 ++++----- 14 files changed, 958 insertions(+), 33 deletions(-) create mode 100644 docs/docs/api/impls.md create mode 100644 docs/docs/api/index.md create mode 100644 docs/docs/api/utils.md create mode 100644 docs/docs/getting-started.md create mode 100644 docs/docs/index.md create mode 100644 docs/docs/installation.md create mode 100644 docs/docs/migration.md create mode 100644 docs/docs/tips.md create mode 100644 docs/mkdocs.yml diff --git a/README.md b/README.md index b1acb5a..a424826 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ This can make your application a lot faster and it can be a good choice in compl - 🤝 Compatible with Python 3.10+ (PyPy and CPython) - 📦 Supports 7 advanced caching algorithms -### When do I need caching and `cachebox`? +### When do I need caching? - 📈 **Frequent Data Access** \ If you need to access the same data multiple times, caching can help reduce the number of database queries or API calls, improving performance. diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 31020f0..d5d4d3d 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -72,7 +72,7 @@ class BaseCacheImpl(typing.Generic[KT, VT]): @property def getsizeof(self) -> typing.Callable[[KT, VT], int] | None: - """Callable or None: The configured ``getsizeof`` function.""" + """The configured ``getsizeof`` function.""" ... def current_size(self) -> int: diff --git a/cachebox/utils.py b/cachebox/utils.py index 0e513f1..6d500a4 100644 --- a/cachebox/utils.py +++ b/cachebox/utils.py @@ -370,7 +370,7 @@ async def __aexit__(self, *_) -> None: self._lock.release() -CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "length"]) +CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "size"]) EVENT_MISS = 1 EVENT_HIT = 2 @@ -559,7 +559,12 @@ async def _wrapped(*args, **kwds): if not cache_is_fn: _wrapped.cache = cache # type: ignore[attr-defined] - _wrapped.cache_info = lambda: CacheInfo(hits, misses, cache.maxsize, len(cache)) # type: ignore[attr-defined] + _wrapped.cache_info = lambda: CacheInfo( # type: ignore[attr-defined] + hits, + misses, + cache.maxsize, + cache.current_size(), + ) def cache_clear() -> None: nonlocal hits, misses diff --git a/docs/docs/api/impls.md b/docs/docs/api/impls.md new file mode 100644 index 0000000..09fe2a8 --- /dev/null +++ b/docs/docs/api/impls.md @@ -0,0 +1,153 @@ + +::: cachebox._core.BaseCacheImpl + options: + members: + - __init__ + - maxsize + - getsizeof + - current_size + - remaining_size + - capacity + - __len__ + - __contains__ + - contains + - is_empty + - is_full + - insert + - __setitem__ + - update + - get + - __getitem__ + - setdefault + - pop + - __delitem__ + - popitem + - drain + - shrink_to_fit + - clear + - __eq__ + - __ne__ + - items + - values + - keys + - __iter__ + - copy + - __repr__ + +::: cachebox._core.Cache + options: + members: + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + +::: cachebox._core.FIFOCache + options: + members: + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + - first + - last + +::: cachebox._core.RRCache + options: + members: + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + +::: cachebox._core.LRUCache + options: + members: + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + - peek + - least_recently_used + - most_recently_used + +::: cachebox._core.LFUCache + options: + members: + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + - items_with_frequency + - peek + - least_frequently_used + +::: cachebox._cachebox.TTLCache + options: + members: + - __init__ + - sweep_interval + - stop_sweeper + - global_ttl + - insert + - update + - get + - setdefault + - pop + - popitem + - items + - values + - keys + - first + - last + - expire + - get_with_expire + - pop_with_expire + - popitem_with_expire + - items_with_expire + +::: cachebox._cachebox.VTTLCache + options: + members: + - __init__ + - sweep_interval + - stop_sweeper + - insert + - update + - setdefault + - popitem + - items + - values + - keys + - first + - last + - expire + - get_with_expire + - pop_with_expire + - popitem_with_expire + - items_with_expire diff --git a/docs/docs/api/index.md b/docs/docs/api/index.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/docs/api/utils.md b/docs/docs/api/utils.md new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/docs/docs/api/utils.md @@ -0,0 +1 @@ + diff --git a/docs/docs/getting-started.md b/docs/docs/getting-started.md new file mode 100644 index 0000000..ebdf123 --- /dev/null +++ b/docs/docs/getting-started.md @@ -0,0 +1,229 @@ +# Getting Started + +This guide walks you through the most common cachebox patterns. +All cache classes behave like Python dictionaries unless noted otherwise. + +## Using the `@cached` Decorator +The simplest way to cache a function's return value: + +```python hl_lines="3" +import cachebox + +@cachebox.cached(cachebox.FIFOCache(maxsize=128)) +def factorial(number: int) -> int: + fact = 1 + for num in range(2, number + 1): + fact *= num + return fact + +assert factorial(5) == 125 +``` + +The first parameter `cache`, you can specify the cache instance it should use for caching. + +```python hl_lines="4" +import cachebox + +@cachebox.cached( + cachebox.LRUCache(maxsize=128), +) +def factorial(number: int) -> int: + fact = 1 + for num in range(2, number + 1): + fact *= num + return fact + +assert factorial(5) == 125 +``` + +### Async Functions + +Coroutines are supported out of the box: + +```python +import cachebox + +@cachebox.cached(cachebox.LRUCache(maxsize=128)) +async def make_request(method: str, url: str) -> dict: + response = await client.request(method, url) + return response.json() +``` + +### Using a Custom Key Maker +There are 3 ready-to-use key maker functions, and by default the `@cached` decorator uses the simplest one of them. + +You can use ready-to-use functions, or create a custom one. + +=== "Standard way" + + ```python hl_lines="5" + import cachebox + + def path_key(request): + return request.path + + @cachebox.cached( + cachebox.LRUCache(128), + key_maker=path_key, + ) + async def request_handler(request): + return Response("hello") + ``` + +=== "Using `lambda`" + + ```python hl_lines="5" + import cachebox + + @cachebox.cached( + cachebox.LRUCache(128), + key_maker=lambda request: request.path, + ) + async def request_handler(request): + return Response("hello") + ``` + +Ready to use key makers are: + +- [make_key function](cachebox.utils.make_key) +- [make_typed_key function](cachebox.utils.make_typed_key) +- [make_hash_key function](cachebox.utils.make_hash_key) + + +### Callbacks on Cache Events +The `@cached` decorator supports callback on every hit/miss, using `callback` parameter. + +```python hl_lines="11" +import cachebox + +def on_cache_event(event: int, key, value): + if event == cachebox.EVENT_MISS: + print(f"MISS key={key}") + elif event == cachebox.EVENT_HIT: + print(f"HIT key={key}") + +@cachebox.cached( + cachebox.LRUCache(0), + callback=on_cache_event, +) +def add(a, b): + return a + b + +add(1, 2) # MISS key=(1, 2) +add(1, 2) # HIT key=(1, 2) +``` + +!!! tip + + May be a coroutine in async contexts. + + +### Setting a Postprocessor +The `@cached` decorator also supports postprocessors, using `postprocess` parameter. +It can be used as a transformer which applied before returning a result to the caller. + +There are 3 ready-to-use key maker functions, and by default the `@cached` decorator uses +[`postprocess_copy_mutable` function](cachebox.utils.postprocess_copy_mutable). + +```python hl_lines="9" +import cachebox + +def postprocess(result): + print(f"RESULT: {result}") + return result + +@cachebox.cached( + cachebox.LRUCache(0), + postprocess=postprocess, +) +def add(a, b): + return a + b + +add(1, 2) # RESULT: 3 +``` + +Ready to use postprocessors: + +- [postprocess_copy function](cachebox.utils.postprocess_copy) +- [postprocess_copy_mutable function](cachebox.utils.postprocess_copy_mutable) +- [postprocess_deepcopy function](cachebox.utils.postprocess_deepcopy) +- [postprocess_deepcopy_mutable function](cachebox.utils.postprocess_deepcopy_mutable) + +!!! note + + Added since v6.0.0, and the `copy_level` parameter removed due to this feature. + +### Bypass the Cache for a Single Call +Pass `cachebox__ignore=True` to skip the cache entirely: + +```python +result = my_func(10, 20, cachebox__ignore=True) +``` + +### Cache on an Instance Method + +```python hl_lines="6 8" +import cachebox + +class MyService: + def __init__(self, multiplier: int): + self.multiplier = multiplier + self._cache = cachebox.TTLCache(20, ttl=10) + + @cachebox.cached(lambda self: self._cache) + def compute(self, char: str): + return char * self.multiplier + +svc = MyService(5) +assert svc.compute("a") == "aaaaa" +``` + +## Using a Cache Implemetations +You can use all cache implementations without `@cached` method. +You only need to import the classes you want and can work with them like a regular dictionaries (except for [`VTTLCache`](cachebox.VTTLCache), this have some differences). + +```python +from cachebox import FIFOCache + +cache = FIFOCache(maxsize=128) +cache["key"] = "value" +assert cache["key"] == "value" +assert cache.get("missing", "default") == "default" +``` + +## Immutable (Frozen) Cache + +Wrap any cache with `Frozen` to prevent further writes: + +```python +from cachebox import Frozen, LRUCache + +cache = LRUCache(10, {1: "a", 2: "b"}) +frozen = Frozen(cache, ignore=False) + +frozen[3] = "c" # TypeError: This cache is frozen. +``` + +## Saving a Cache to Disk + +Use Python's `pickle` module: + +```python +import cachebox, pickle + +cache = cachebox.LRUCache(100, {i: i for i in range(50)}) + +with open("cache.pkl", "wb") as f: + pickle.dump(cache, f) + +with open("cache.pkl", "rb") as f: + loaded = pickle.load(f) + +assert cache == loaded +``` + +## Next Steps + +- Browse the full [API Reference](api/index.md) for every class and method. +- Check [Tips & Notes](tips.md) for copying caches and advanced patterns. +- Read the [Migration Guide](migration.md) if upgrading from v4. diff --git a/docs/docs/index.md b/docs/docs/index.md new file mode 100644 index 0000000..6f0840a --- /dev/null +++ b/docs/docs/index.md @@ -0,0 +1,76 @@ +--- +title: Cachebox +description: The fastest caching Python library written in Rust +--- + +
+

Cachebox

+ The fastest caching Python library written in Rust +
+ +--- + +Cachebox lets you perform powerful caching operations in Python as fast as possible. +It can make your application significantly faster and is an excellent choice for complex, +high-scale applications. + +## Key Features + +
+ +- :rocket: **Extremely Fast** + + 10–50x faster than other caching libraries - [see benchmarks](https://github.com/awolverp/cachebox-benchmark). + +- :bar_chart: **Low Memory Usage** + + Only ~50% of the memory consumed by a standard Python dictionary. + +- :thread: **Thread-Safe** + + All cache operations are fully thread-safe via internal locking. + +- :package: **Zero Dependencies** + + Written entirely in Rust - no Python dependencies to install. + +- :fire: **Full-Featured** + + 7 caching algorithms, TTL support, decorators, callbacks, and more. + +- :handshake: **Compatible** + + Works with Python 3.10+ on both CPython and PyPy. + +
+ +## When Should I Use Caching? +- **Frequent Data Access**: If you need to access the same data multiple times, caching can help reduce the number of database queries or API calls, improving performance. + +- **Expensive Operations**: If you have operations that are computationally expensive, caching can help reduce the number of times these operations need to be performed. + +- **High Traffic Scenarios**: If your application handles high traffic, caching can help reduce the load on your server by reducing the number of requests that need to be processed. + +- **Web Page Rendering**: If you are rendering web pages, caching can help reduce the time it takes to generate the page by caching the results of expensive rendering operations. Caching HTML pages can speed up the delivery of static content. + +- **Rate Limiting**: If you have a rate limiting system in place, caching can help reduce the number of requests that need to be processed by the rate limiter. Also, caching can help you to manage rate limits imposed by third-party APIs by reducing the number of requests sent. + +- **Machine Learning Models**: If your application frequently makes predictions using the same input data, caching the results can save computation time. + + +## Quick Example + +```python +import cachebox + +@cachebox.cached(cachebox.LRUCache(maxsize=128)) +def get_user(user_id: int) -> dict: + # Expensive DB call - cached after first call + return db.query("SELECT * FROM users WHERE id = ?", user_id) + +# First call hits the database +user = get_user(42) + +# Subsequent calls are served from cache instantly +user = get_user(42) +``` diff --git a/docs/docs/installation.md b/docs/docs/installation.md new file mode 100644 index 0000000..313ec94 --- /dev/null +++ b/docs/docs/installation.md @@ -0,0 +1,32 @@ +**cachebox** is available on PyPI. You can use *pip* or *uv* to install cachebox. +You can install MarkupEver using **pip**: + +=== "Using pip" + + ```console + $ pip install -U cachebox + ``` + +=== "Using uv" + + ```console + $ uv add cachebox + ``` + +That's it - cachebox has **zero Python dependencies**. The Rust extension is distributed as a +pre-built wheel for all major platforms and Python versions. + +!!! tip "Use Virtual Environments" + + It's recommended to use virtual environments for installing and managing libraries in Python. + +!!! warning "Upgrading from v5 to v6" + Version 6 introduces several breaking changes. Please review the + [Migration Guide](migration.md) before upgrading. + +## Verifying the Installation + +```python +import cachebox +print(cachebox.__version__) +``` diff --git a/docs/docs/migration.md b/docs/docs/migration.md new file mode 100644 index 0000000..cadd9e3 --- /dev/null +++ b/docs/docs/migration.md @@ -0,0 +1,150 @@ +# Migration Guide + +This page documents breaking changes between major versions. + +## v5 → v6 +These are changes that are not compatible with the previous version: + + + +### `copy_level` parameter has removed from `@cached` +We removed `copy_level` parameter from `@cached` decorator. +The new `postprocess` feature gives you more control on results. + +```python +# v5 +@cachebox.cached(cachebox.RRCache(10), copy_level=2) +def add(a: int, b: int) -> dict: + return {a: b} + +# v6 +@cachebox.cached(cachebox.RRCache(10), postprocess=cachebox.postprocess_copy) +def add(a: int, b: int) -> dict: + return {a: b} +``` + +### `TTLCache.ttl` has renamed to `TTLCache.global_ttl` +`TTLCache.ttl` has renamed to `TTLCache.global_ttl` because it was causing developers to confuse the usage of +`TTLCache.ttl` with `VTTLCache`'s `ttl` parameter. + +```python +# v5 +cache = cachebox.TTLCache(maxsize=125, ttl=10) +print(cache.ttl) + +# v6 +cache = cachebox.TTLCache(maxsize=125, global_ttl=10) +print(cache.global_ttl) +``` + +### Maxmemory limit has removed +In version 5, we could limit the cache classes by memory using `maxmemory` parameter. +But it caused performance `-75%`, and that was not the library targets. Our focus is on performance & speed. +So we removed it, but added a new parameter: `getsizeof`. A callable that computes the size of a key-value pair. +Now you can use this to implement weighted caching - for example, sizing entries by memory footprint or byte length. +This could cover `maxmemory`, while keeps performance on top. + +```python +# v5 +cache = cachebox.LRUCache(maxsize=125, maxmemory=1000) + +# v6 +import sys + +def getsizeof(key, val): + return sys.getsizeof(key) + sys.getsizeof(val) + +cache = cachebox.LRUCache(maxsize=1000, getsizeof=getsizeof) +``` + +Due to this breaking change, we also removed `memory` property from cache classes, and +added new methods: `current_size` and `remaining_size`. + +```python +# v5 +print(cache.memory) + +# v6 +print(cache.current_size()) +print(cache.remaining_size()) +``` + +### `CacheInfo` fields have changed +The `cachebox.utils.CacheInfo` namedtuple fields has breaking changes: +- `memory` field removed. +- `length` renamed to `size`. + +```python +info = cached_function.cache_info() + +# v5 +print(info.length) +print(info.memory) + +# v6 +print(info.size) +print(info.memory) # AttributeError +``` + +## v4 → v5 +These are changes that are not compatible with the previous version: + +### `CacheInfo.cachememory` renamed to `CacheInfo.memory` + +```python +info = func.cache_info() + +# v4 +print(info.cachememory) + +# v5 +print(info.memory) +``` + +### `__eq__` errors are no longer silently swallowed + +In v4, errors raised inside a custom `__eq__` method were caught and converted to a `KeyError`. +In v5, they propagate normally. + +```python +class A: + def __hash__(self): return 1 + def __eq__(self, other): raise NotImplementedError + +cache = cachebox.FIFOCache(0, {A(): 10}) + +# v4: raises KeyError +# v5: raises NotImplementedError +cache[A()] +``` + +### Cache comparisons are no longer order-dependent + +In v4, two caches with the same keys/values in different insertion order were considered unequal. +In v5, cache equality follows standard dictionary semantics. + +```python +c1 = cachebox.FIFOCache(10) +c2 = cachebox.FIFOCache(10) + +c1.insert(1, 'a'); c1.insert(2, 'b') +c2.insert(2, 'b'); c2.insert(1, 'a') + +# v4: False (order-dependent) +# v5: True (dict-like) +print(c1 == c2) +``` + +### `cachedmethod` deprecated + +`cachedmethod` is deprecated since v5.1.0. Use `cached` with a `lambda self:` cache accessor: + +```python +# Before (v4) +@cachebox.cachedmethod(cachebox.TTLCache(0, ttl=10)) +def my_method(self, name: str): ... + +# After (v5.1.0+) +@cachebox.cached(lambda self: self._cache) +def my_method(self, name: str): ... +``` diff --git a/docs/docs/tips.md b/docs/docs/tips.md new file mode 100644 index 0000000..adb6902 --- /dev/null +++ b/docs/docs/tips.md @@ -0,0 +1,180 @@ +# Tips & Notes + +## Saving a Cache to a File + +Cachebox does not include built-in persistence, but all cache classes support Python's +`pickle` module: + +```python +import cachebox, pickle + +cache = cachebox.LRUCache(100, {i: i for i in range(78)}) + +# Save +with open("cache.pkl", "wb") as f: + pickle.dump(cache, f) + +# Load +with open("cache.pkl", "rb") as f: + loaded = pickle.load(f) + +assert cache == loaded +assert cache.capacity() == loaded.capacity() +``` + +## Copying a Cache +All cache classes support Python's `copy` module, both shallow-copy and deep-copy: + +```python +import cachebox +import copy + +cache = cachebox.LRUCache(100, {i: i for i in range(10)}) + +shallow = copy.copy(cache) # shallow copy +deep = copy.deepcopy(cache) # deep copy +``` + +## Avoiding Cache Stampede + +Cachebox uses a distributed lock system internally to prevent the +[cache stampede](https://en.wikipedia.org/wiki/Cache_stampede) problem — +multiple concurrent requests recomputing the same missing entry simultaneously. +No additional configuration is required. + +## Pre-allocating Capacity +If you know roughly how many items a cache will hold, set `capacity` to avoid +hash table rehashing during initial population: + +```python +cache = cachebox.LRUCache(maxsize=10_000, capacity=10_000) +``` + +## Thread Safety + +All cache operations (reads, writes, eviction) are protected by internal Rust mutexes. +You do **not** need to add external synchronisation. + +## TTL and Frozen Caches + +!!! note + + `Frozen` cannot prevent TTL expiration in `TTLCache` or `VTTLCache`. + Items will still expire naturally even when the cache is frozen. + + ```python + from cachebox import Frozen, TTLCache + import time + + cache = TTLCache(0, ttl=1, iterable={1: "a"}) + frozen = Frozen(cache) + time.sleep(1) + print(len(frozen)) # 0 — expired despite being frozen + ``` + +## Attached attributes to cached functions +When you use the `@cached` decorator, If *cache* isn't a lambda/function, these attributes will be attached to +your function: + +=== "`cache` (property)" + + The cache class we're using for caching results. + + ```python hl_lines="9" + import cachebox + + @cachebox.cached( + cachebox.LFUCache(maxsize=20), + ) + def add(a: int, b: int) -> int: + return a + b + + assert type(add.cache) is cachebox.LFUCache + ``` + + !!! tip + You can use [get_cached_cache function](cachebox.utils.get_cached_cache) to prevent lint + & IDE warnings. + + ```python + assert type(cachebox.get_cached_cache(add)) is cachebox.LFUCache + ``` + +=== "`cache_info` (callable)" + + By calling it, you will get a basic statistics. + + ```python hl_lines="9" + import cachebox + + @cachebox.cached( + cachebox.LFUCache(maxsize=20), + ) + def add(a: int, b: int) -> int: + return a + b + + cache_info = add.cache_info() # CacheInfo(hits=0, misses=0, maxsize=20, size=0) + ``` + + !!! tip + You can use [get_cached_cache_info function](cachebox.utils.get_cached_cache_info) to prevent lint + & IDE warnings. + + ```python + cache_info = cachebox.get_cached_cache_info(add) # CacheInfo(hits=0, misses=0, maxsize=20, size=0) + ``` + +=== "`cache_clear` (callable)" + + Call it if you want to clear cache and reset statistics. + + ```python hl_lines="9" + import cachebox + + @cachebox.cached( + cachebox.LFUCache(maxsize=20), + ) + def add(a: int, b: int) -> int: + return a + b + + add.cache_clear() + ``` + + !!! tip + You can use [clear_cached_cache function](cachebox.utils.clear_cached_cache) to prevent lint + & IDE warnings. + + ```python + cachebox.clear_cached_cache(add) + ``` + +=== "`callback` (property)" + + The configured `callback`. + + ```python hl_lines="12" + import cachebox + + def callback(event, key, value): ... + + @cachebox.cached( + cachebox.LFUCache(maxsize=20), + callback=callback, + ) + def add(a: int, b: int) -> int: + return a + b + + assert add.callback is callback + ``` + + !!! tip + You can use [get_cached_callback function](cachebox.utils.get_cached_callback) to prevent lint + & IDE warnings. + + ```python + assert cachebox.get_cached_callback(add) is callback + ``` + + +## TTLCache/VTTLCache background thread +TODO diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml new file mode 100644 index 0000000..bd0798e --- /dev/null +++ b/docs/mkdocs.yml @@ -0,0 +1,96 @@ +site_name: Cachebox +site_description: The fastest caching Python library written in Rust +site_url: https://awolverp.github.io/cachebox +repo_url: https://github.com/awolverp/cachebox +repo_name: awolverp/cachebox +edit_uri: edit/main/docs/ + +theme: + name: material + palette: + - scheme: default + primary: deep orange + accent: orange + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - scheme: slate + primary: deep orange + accent: orange + toggle: + icon: material/brightness-4 + name: Switch to light mode + + features: + - table + - navigation.instant + - navigation.instant.progress + - navigation.tabs + - navigation.prune + - toc.integrate + - search + - search.suggest + - search.share + - projects + - optimize + - content.code.copy + - content.code.select + - content.code.annotate + - navigation.footer + + icon: + repo: fontawesome/brands/github + +plugins: + - search + - mkdocstrings: + handlers: + python: + paths: [cachebox] + options: + docstring_style: google + docstring_section_style: list + signature_crossrefs: true + inherited_members: true + parameter_headings: true + type_parameter_headings: true + show_root_heading: true + show_root_full_path: false + show_symbol_type_heading: true + show_symbol_type_toc: true + merge_init_into_class: true + show_signature_annotations: true + show_signature_type_parameters: true + show_bases: false + +markdown_extensions: + - admonition + - pymdownx.details + - pymdownx.superfences + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.tabbed: + alternate_style: true + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - tables + - attr_list + - md_in_html + - toc: + permalink: true + +nav: + - Home: index.md + - Installation: installation.md + - Getting Started: getting-started.md + - API Reference: + - Overview: api/index.md + - Implementations: api/impls.md + - Utilities: api/utils.md + + - Migration Guide: migration.md + - Tips & Notes: tips.md diff --git a/requirements-dev.txt b/requirements-dev.txt index da5f4df..0725ddd 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,3 +3,5 @@ pytest hypothesis pytest-benchmark pytest-asyncio +mkdocs-material +mkdocstrings[python] diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index e1e3c43..9a37943 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -9,41 +9,42 @@ use crate::policies::wrapped::Wrapped; implement_pyclass! { /// A thread-safe, memory-efficient key-value cache with no eviction policy. - /// items remain in the cache until manually removed or the cache is cleared. /// - /// ## How It Works - /// `Cache` is essentially a configurable hashmap-like store. When an item is inserted: - /// - It is stored directly without any ordering, priority tracking, or access metadata. - /// - If a maximum size is configured, insertions beyond that limit are rejected (raises OverflowError). - /// - All read and write operations are thread-safe, making it safe for concurrent access without - /// external locking. + /// Items remain in the cache until manually removed or the cache is cleared. /// - /// Because no eviction logic runs in the background, there is no overhead from tracking usage order, - /// frequency counters, or expiry timestamps. + /// ``Cache`` is essentially a configurable hashmap-like store. When an item is + /// inserted, it is stored directly without any ordering, priority tracking, or + /// access metadata. If a maximum size is configured, insertions beyond that + /// limit are rejected with an ``OverflowError``. All read and write operations + /// are thread-safe. /// - /// ### Pros - /// - Minimal overhead: no bookkeeping for eviction means lower CPU and memory usage per entry compared - /// to policy-based caches. - /// - Predictable behavior: items are never silently removed, so cache hits are deterministic once an - /// item is stored. - /// - Thread-safe: safe for concurrent reads and writes out of the box. - /// - Configurable capacity: a hard size limit prevents unbounded memory growth. + /// Because no eviction logic runs in the background, there is no overhead from + /// tracking usage order, frequency counters, or expiry timestamps. /// - /// ### Cons - /// - No automatic eviction: the cache can fill up and stop accepting new entries if a max size is set, - /// requiring manual management. - /// - Unordered: unlike a standard dict (Python 3.7+), insertion order is not preserved. - /// - Not suitable for volatile data: stale entries persist forever unless explicitly invalidated. + /// Pros: + /// - Minimal overhead: no bookkeeping for eviction means lower CPU and + /// memory usage per entry compared to policy-based caches. + /// - Predictable behavior: items are never silently removed, so cache hits + /// are deterministic once an item is stored. + /// - Thread-safe: safe for concurrent reads and writes out of the box. + /// - Configurable capacity: a hard size limit prevents unbounded memory + /// growth. /// - /// ## When to Use It - /// `Cache` is the right choice when: - /// - You have a fixed, well-known set of keys that are expensive to compute and never go stale - /// (e.g., parsed config values, compiled regex patterns, loaded templates). - /// - The cached data has no meaningful expiry - it's either always valid or always explicitly invalidated. - /// - You need the lowest possible overhead and can guarantee the cache won't grow uncontrollably. + /// Cons: + /// - No automatic eviction: the cache can fill up and stop accepting new + /// entries if a max size is set, requiring manual management. + /// - Unordered: unlike a standard ``dict`` (Python 3.7+), insertion order + /// is not preserved. + /// - Not suitable for volatile data: stale entries persist forever unless + /// explicitly invalidated. /// - /// Avoid it when cached data can become stale, when the working set is unpredictable in size, or when you need automatic - /// memory pressure relief. + /// Use ``Cache`` when you have a fixed, well-known set of keys that are + /// expensive to compute and never go stale (e.g. parsed config values, + /// compiled regex patterns, loaded templates), and when the lowest possible + /// overhead is required. + /// + /// Avoid it when cached data can become stale, when the working set is + /// unpredictable in size, or when automatic memory pressure relief is needed. [subclass, extends=crate::pyclasses::base::PyBaseCacheImpl, generic, frozen] PyCache as "Cache" (onceinit::OnceInit>); } From 2750212e4f69d7616e619385953cce19fd78e7c4 Mon Sep 17 00:00:00 2001 From: awolverp Date: Sun, 31 May 2026 17:03:14 +0330 Subject: [PATCH 45/60] Update docs --- cachebox/utils.py | 26 +++++++++++++------------- docs/docs/api/index.md | 4 ++++ docs/docs/api/utils.md | 21 +++++++++++++++++++++ 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/cachebox/utils.py b/cachebox/utils.py index 6d500a4..9065c6d 100644 --- a/cachebox/utils.py +++ b/cachebox/utils.py @@ -591,23 +591,23 @@ def cached( Args: cache: Cache instance, ``dict``, or callable ``(self) -> cache`` for - per-instance caches. ``None`` defaults to an unbounded - :class:`LRUCache`. + per-instance caches. ``None`` defaults to an unbounded + :class:`LRUCache`. key_maker: Converts ``(args, kwds)`` to a hashable key. Built-ins: - :func:`make_key` (default), :func:`make_hash_key`, - :func:`make_typed_key`. + :func:`make_key` (default), :func:`make_hash_key`, + :func:`make_typed_key`. clear_reuse: Pass ``reuse=True`` to ``cache.clear()`` when - :func:`cache_clear` is called. + :func:`cache_clear` is called. callback: Called as ``callback(event, key, value)`` on every hit/miss. - May be a coroutine in async contexts. + May be a coroutine in async contexts. postprocess: Optional ``(value) -> value`` transform applied before - returning a result to the caller. Ready-to-use options: + returning a result to the caller. Ready-to-use options: - * ``None`` - return the cached object as-is. - * :func:`postprocess_copy` - shallow-copy. - * :func:`postprocess_copy_mutables` - shallow-copy only `dict`, `list` and `set` (default). - * :func:`postprocess_deepcopy` - deep-copy. - * :func:`postprocess_deepcopy_mutables` - deep-copy only `dict`, `list` and `set`. + * ``None`` - return the cached object as-is. + * :func:`postprocess_copy` - shallow-copy. + * :func:`postprocess_copy_mutables` - shallow-copy only `dict`, `list` and `set` (default). + * :func:`postprocess_deepcopy` - deep-copy. + * :func:`postprocess_deepcopy_mutables` - deep-copy only `dict`, `list` and `set`. Note: Pass ``cachebox__ignore=True`` at call-time to bypass the cache. @@ -703,7 +703,7 @@ def get_cached_callback(cached_func: object) -> _Callback | None: return cached_func.callback # type: ignore -def clear_cached_cache(cached_func: object) -> BaseCacheImpl: +def clear_cached_cache(cached_func: object) -> None: """ A way to call ``cached_func.cache_clear()``, without type-hint warnings. diff --git a/docs/docs/api/index.md b/docs/docs/api/index.md index e69de29..8b264bd 100644 --- a/docs/docs/api/index.md +++ b/docs/docs/api/index.md @@ -0,0 +1,4 @@ +You can see + +- Core API reference [here](./impls.md) +- Utilities API reference [here](./utils.md) diff --git a/docs/docs/api/utils.md b/docs/docs/api/utils.md index 8b13789..d2df650 100644 --- a/docs/docs/api/utils.md +++ b/docs/docs/api/utils.md @@ -1 +1,22 @@ +::: cachebox.utils.postprocess_copy_mutables +::: cachebox.utils.postprocess_copy +::: cachebox.utils.postprocess_deepcopy_mutables +::: cachebox.utils.postprocess_deepcopy + +::: cachebox.utils.make_key +::: cachebox.utils.make_hash_key +::: cachebox.utils.make_typed_key + +::: cachebox.utils.Frozen + +::: cachebox.utils.CacheInfo +::: cachebox.utils.EVENT_MISS +::: cachebox.utils.EVENT_HIT + +::: cachebox.utils.cached +::: cachebox.utils.is_cached +::: cachebox.utils.get_cached_cache +::: cachebox.utils.get_cached_cache_info +::: cachebox.utils.get_cached_callback +::: cachebox.utils.clear_cached_cache From 840e571f6044cfbffb97753bd90794fe6f9e92a5 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 12:07:51 +0330 Subject: [PATCH 46/60] Update __sizeof__ methods, and complete docs --- cachebox/_cachebox.py | 47 ++++++++++++++ cachebox/_core.pyi | 117 +++++++++++++++++++++++++++++++++++ cachebox/utils.py | 37 ++++++++++- docs/docs/getting-started.md | 27 ++++---- docs/docs/migration.md | 46 ++++++-------- docs/docs/tips.md | 100 ++++++++++++++++++++++++------ src/pyclasses/cache.rs | 4 +- src/pyclasses/fifocache.rs | 4 +- src/pyclasses/lfucache.rs | 4 +- src/pyclasses/lrucache.rs | 4 +- src/pyclasses/rrcache.rs | 4 +- src/pyclasses/ttlcache.rs | 4 +- src/pyclasses/vttlcache.rs | 4 +- 13 files changed, 333 insertions(+), 69 deletions(-) diff --git a/cachebox/_cachebox.py b/cachebox/_cachebox.py index 13af0b4..c62b46c 100644 --- a/cachebox/_cachebox.py +++ b/cachebox/_cachebox.py @@ -55,6 +55,10 @@ class TTLCache(_CoreTTLCache[KT, VT]): ``sweep_interval`` is set, a background thread performs the sweep on that interval instead, reclaiming expired entries independent of method calls. + | | get | insert | delete | popitem | + | ------------ | ----- | ------- | ---------------- | ------- | + | Worse-case | O(1) | O(1) | O(min(i, n-i)) | O(n) - very rare | + Pros: - Insert, lookup, and evict are all O(1) amortized: the ``front_offset`` trick eliminates the O(n) index-shifting that a @@ -90,6 +94,23 @@ class TTLCache(_CoreTTLCache[KT, VT]): Avoid it when strong temporal locality makes LRU a better fit, when per-entry TTL granularity is required (consider ``VTTLCache`` instead), or when the system clock is unreliable or subject to adjustment. + + Example:: + + from cachebox import TTLCache + import time + + cache = TTLCache(0, global_ttl=2) + cache.update({i:str(i) for i in range(10)}) + + print(cache.get_with_expire(2)) # ('2', 1.99) + + # Returns the oldest key in cache; this is the one which will be removed by `popitem()` + print(cache.first()) # 0 + + cache["mykey"] = "value" + time.sleep(2) + cache["mykey"] # KeyError """ def __init__( @@ -204,6 +225,10 @@ class VTTLCache(_CoreVTTLCache[KT, VT]): no TTL are the last resort and are evicted only when all expiring items have been exhausted. + | | get | insert | delete(i) | popitem | + | ------------ | ----- | ------- | -------------- | ------- | + | Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | + Pros: - Per-item TTL control: each entry can have a different lifetime. - Expired items are reclaimed before live items, maximising useful @@ -237,6 +262,28 @@ class VTTLCache(_CoreVTTLCache[KT, VT]): when strict and immediate expiry is a hard requirement, or when memory pressure from temporarily lingering stale entries is unacceptable and a background thread is not an option. + + Example:: + + from cachebox import VTTLCache + import time + + cache = VTTLCache(100, iterable={i:i for i in range(4)}, ttl=3) + print(len(cache)) # 4 + time.sleep(3) + print(len(cache)) # 0 + + # The "key1" is exists for 5 seconds + cache.insert("key1", "value", ttl=5) + # The "key2" is exists for 2 seconds + cache.insert("key2", "value", ttl=2) + + time.sleep(2) + # "key1" is exists for 3 seconds + print(cache.get("key1")) # value + + # "key2" has expired + print(cache.get("key2")) # None """ def __init__( diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index d5d4d3d..d27b9cf 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -239,6 +239,10 @@ class Cache(BaseCacheImpl[KT, VT]): Because no eviction logic runs in the background, there is no overhead from tracking usage order, frequency counters, or expiry timestamps. + | | get | insert | delete | popitem | + | ------------ | ----- | ------- | ------ | ------- | + | Worse-case | O(1) | O(1) | O(1) | N/A | + Pros: - Minimal overhead: no bookkeeping for eviction means lower CPU and memory usage per entry compared to policy-based caches. @@ -263,8 +267,38 @@ class Cache(BaseCacheImpl[KT, VT]): Avoid it when cached data can become stale, when the working set is unpredictable in size, or when automatic memory pressure relief is needed. + + Example:: + + from cachebox import Cache + + cache = Cache(maxsize=100, iterable=None, capacity=100) + + # behaves like a regular dict + cache["key"] = "value" + # using `.insert(key, value)` is recommended + cache.insert("key", "value") + + print(cache["key"]) # value + + del cache["key"] + cache["key"] # KeyError: key + + # cachebox.Cache does not have any policy, so will raise OverflowError if the capacity is exceeded + cache.update({i:i for i in range(200)}) + # OverflowError: The cache has reached the bound. """ + # | Class | get | insert | delete | popitem | + # |---|---|---|---|---| + # | \`Cache\` | O(1) | O(1) | O(1) | N/A | + # | \`FIFOCache\` | O(1) | O(1) | O(min(i, n-i)) | O(1) | + # | \`RRCache\` | O(1) | O(1) | O(1) | O(1) | + # | \`LRUCache\` | O(1)~ | O(1)~ | O(1)~ | O(1)~ | + # | \`LFUCache\` | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | + # | \`TTLCache\` | O(1)~ | O(1)~ | O(min(i, n-i)) | O(n) | + # | \`VTTLCache\` | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ Inserts a key-value pair and returns the previous value if present. @@ -399,6 +433,10 @@ class FIFOCache(BaseCacheImpl[KT, VT]): A ``front_offset`` counter recovers physical positions at read time as ``entries[table[key] - front_offset]``. + | | get | insert | delete | popitem | + | ------------ | ----- | ------- | ---------------- | ------- | + | Worse-case | O(1) | O(1) | O(min(i, n-i)) | O(n) - very rare | + Pros: - Insert, lookup, and evict are all O(1) amortized. - Eviction order is fully deterministic and easy to reason about. @@ -418,6 +456,24 @@ class FIFOCache(BaseCacheImpl[KT, VT]): Avoid it when the workload has strong temporal locality; in those cases LRU or LFU will deliver meaningfully better hit rates. + + Example:: + + from cachebox import FIFOCache + + cache = FIFOCache(5, {i:i*2 for i in range(5)}) + + print(len(cache)) # 5 + cache["new-key"] = "new-value" + print(len(cache)) # 5 + + print(cache.get(3, "default-val")) # 6 + print(cache.get(6, "default-val")) # default-val + + print(cache.popitem()) # (1, 2) + + # Returns the first key in cache; this is the one which will be removed by `popitem()`. + print(cache.first()) """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: @@ -554,6 +610,10 @@ class RRCache(BaseCacheImpl[KT, VT]): ensuring fair treatment across all cached items regardless of access patterns. + | | get | insert | delete | popitem(i) | + | ------------ | ----- | ------- | ------ | -------------- | + | Worse-case | O(1) | O(1) | O(1) | O(min(i, n-i)) | + Pros: - Low overhead: computationally cheap compared to tracking access order or frequency. @@ -575,6 +635,17 @@ class RRCache(BaseCacheImpl[KT, VT]): Avoid it when access patterns are highly skewed, cache hits are mission-critical, or fine-grained eviction control is required. + + Example:: + + from cachebox import RRCache + + cache = RRCache(10, {i:i for i in range(10)}) + print(cache.is_full()) # True + print(cache.is_empty()) # False + + # Returns a random key + print(cache.random_key()) # 4 """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: @@ -718,6 +789,10 @@ class LRUCache(BaseCacheImpl[KT, VT]): O(1) lookups. On every access the item is moved to the back. On eviction the front item is removed. A running total enables O(1) capacity checks. + | | get | insert | delete(i) | popitem | + | ------------ | ----- | ------- | --------- | ------- | + | Worse-case | O(1)~ | O(1)~ | O(1)~ | O(1)~ | + Pros: - Excellent hit rates on temporal-locality workloads. - Insert, lookup, and evict are all O(1) amortized. @@ -738,6 +813,21 @@ class LRUCache(BaseCacheImpl[KT, VT]): Avoid it for write-heavy workloads with few re-reads, ultra-low-latency requirements, or frequency-heavy bimodal access patterns (consider LFU instead). + + Example:: + + from cachebox import LRUCache + + cache = LRUCache(0, {i:i*2 for i in range(10)}) + + # access `1` + print(cache[0]) # 0 + print(cache.least_recently_used()) # 1 + print(cache.popitem()) # (1, 2) + + # .peek() searches for a key-value in the cache and returns it without moving the key to recently used. + print(cache.peek(2)) # 4 + print(cache.popitem()) # (3, 6) """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: @@ -912,6 +1002,10 @@ class LFUCache(BaseCacheImpl[KT, VT]): minimum-frequency item is popped in O(n log n) worst-case (amortised O(log n) under typical distributions). Lookups are O(1) via the hash map. + | | get | insert | delete(i) | popitem | + | ------------ | ----- | ------- | -------------- | ------- | + | Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | + Pros: - Frequency-aware eviction protects hot items under heavy cache pressure. @@ -934,6 +1028,29 @@ class LFUCache(BaseCacheImpl[KT, VT]): Avoid it when access patterns shift rapidly (use LRU instead) or when all keys are accessed with roughly equal probability. + + Example:: + + from cachebox import LFUCache + + cache = cachebox.LFUCache(5) + cache.insert('first', 'A') + cache.insert('second', 'B') + + # access 'first' twice + cache['first'] + cache['first'] + + # access 'second' once + cache['second'] + + assert cache.least_frequently_used() == 'second' + assert cache.least_frequently_used(2) is None # 2 is out of range + + for item in cache.items_with_frequency(): + print(item) + # ('second', 'B', 1) + # ('first', 'A', 2) """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: diff --git a/cachebox/utils.py b/cachebox/utils.py index 9065c6d..73534de 100644 --- a/cachebox/utils.py +++ b/cachebox/utils.py @@ -117,6 +117,26 @@ class Frozen(BaseCacheImpl[KT, VT]): # pragma: no cover This class provides a read-only view of a cache, optionally allowing silent suppression of modification attempts instead of raising exceptions. + + Example:: + + from cachebox import Frozen, FIFOCache + + cache = FIFOCache(10, {1:1, 2:2, 3:3}) + + frozen = Frozen(cache, ignore=True) + print(frozen[1]) # 1 + print(len(frozen)) # 3 + + # Frozen ignores this action and do nothing + frozen.insert("key", "value") + print(len(frozen)) # 3 + + # Let's try with ignore=False + frozen = Frozen(cache, ignore=False) + + frozen.insert("key", "value") + # TypeError: This cache is frozen. """ __slots__ = ("__cache", "ignore") @@ -370,7 +390,9 @@ async def __aexit__(self, *_) -> None: self._lock.release() -CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "size"]) +CacheInfo = namedtuple( + "CacheInfo", ("hits", "misses", "maxsize", "current_size", "length", "memory") +) EVENT_MISS = 1 EVENT_HIT = 2 @@ -564,6 +586,8 @@ async def _wrapped(*args, **kwds): misses, cache.maxsize, cache.current_size(), + len(cache), + cache.__sizeof__(), ) def cache_clear() -> None: @@ -584,6 +608,7 @@ def cached( key_maker: typing.Callable[..., typing.Hashable] = make_key, clear_reuse: bool = False, callback: _Callback | None = None, + copy_level: int = 1, postprocess: _PostProcess | None = postprocess_copy_mutables, ) -> typing.Callable[[FT], FT]: """ @@ -600,6 +625,8 @@ def cached( :func:`cache_clear` is called. callback: Called as ``callback(event, key, value)`` on every hit/miss. May be a coroutine in async contexts. + copy_level: It has been deprecated and no longer has any effect. Use + the postprocess parameter instead. postprocess: Optional ``(value) -> value`` transform applied before returning a result to the caller. Ready-to-use options: @@ -630,6 +657,14 @@ def __init__(self): def compute(self, n): return n * 2 """ + if copy_level != 1: + import warnings + + warnings.warn( + "`copy_level` parameter has been deprecated and no longer has any effect. Use the `postprocess` parameter instead", + category=DeprecationWarning, + ) + if cache is None: cache = LRUCache(0) elif type(cache) is dict: diff --git a/docs/docs/getting-started.md b/docs/docs/getting-started.md index ebdf123..881426e 100644 --- a/docs/docs/getting-started.md +++ b/docs/docs/getting-started.md @@ -56,7 +56,7 @@ You can use ready-to-use functions, or create a custom one. === "Standard way" - ```python hl_lines="5" + ```python hl_lines="3 4 8" import cachebox def path_key(request): @@ -85,15 +85,15 @@ You can use ready-to-use functions, or create a custom one. Ready to use key makers are: -- [make_key function](cachebox.utils.make_key) -- [make_typed_key function](cachebox.utils.make_typed_key) -- [make_hash_key function](cachebox.utils.make_hash_key) +- [make_key function](api/utils.md#cachebox.utils.make_key) +- [make_typed_key function](api/utils.md#cachebox.utils.make_typed_key) +- [make_hash_key function](api/utils.md#cachebox.utils.make_hash_key) ### Callbacks on Cache Events The `@cached` decorator supports callback on every hit/miss, using `callback` parameter. -```python hl_lines="11" +```python hl_lines="3 4 5 6 7 11" import cachebox def on_cache_event(event: int, key, value): @@ -123,9 +123,9 @@ The `@cached` decorator also supports postprocessors, using `postprocess` parame It can be used as a transformer which applied before returning a result to the caller. There are 3 ready-to-use key maker functions, and by default the `@cached` decorator uses -[`postprocess_copy_mutable` function](cachebox.utils.postprocess_copy_mutable). +[`postprocess_copy_mutables` function](api/utils.md#cachebox.utils.postprocess_copy_mutables). -```python hl_lines="9" +```python hl_lines="3 4 5 9" import cachebox def postprocess(result): @@ -144,10 +144,10 @@ add(1, 2) # RESULT: 3 Ready to use postprocessors: -- [postprocess_copy function](cachebox.utils.postprocess_copy) -- [postprocess_copy_mutable function](cachebox.utils.postprocess_copy_mutable) -- [postprocess_deepcopy function](cachebox.utils.postprocess_deepcopy) -- [postprocess_deepcopy_mutable function](cachebox.utils.postprocess_deepcopy_mutable) +- [postprocess_copy function](api/utils.md#cachebox.utils.postprocess_copy) +- [postprocess_copy_mutables function](api/utils.md#cachebox.utils.postprocess_copy_mutables) +- [postprocess_deepcopy function](api/utils.md#cachebox.utils.postprocess_deepcopy) +- [postprocess_deepcopy_mutables function](api/utils.md#cachebox.utils.postprocess_deepcopy_mutables) !!! note @@ -180,7 +180,8 @@ assert svc.compute("a") == "aaaaa" ## Using a Cache Implemetations You can use all cache implementations without `@cached` method. -You only need to import the classes you want and can work with them like a regular dictionaries (except for [`VTTLCache`](cachebox.VTTLCache), this have some differences). +You only need to import the classes you want and can work with them like a regular dictionaries +(except for [`VTTLCache`](api/impls.md#cachebox._cachebox.VTTLCache), this have some differences). ```python from cachebox import FIFOCache @@ -226,4 +227,4 @@ assert cache == loaded - Browse the full [API Reference](api/index.md) for every class and method. - Check [Tips & Notes](tips.md) for copying caches and advanced patterns. -- Read the [Migration Guide](migration.md) if upgrading from v4. +- Read the [Migration Guide](migration.md) if upgrading from v5. diff --git a/docs/docs/migration.md b/docs/docs/migration.md index cadd9e3..9c2f610 100644 --- a/docs/docs/migration.md +++ b/docs/docs/migration.md @@ -5,11 +5,9 @@ This page documents breaking changes between major versions. ## v5 → v6 These are changes that are not compatible with the previous version: - - -### `copy_level` parameter has removed from `@cached` -We removed `copy_level` parameter from `@cached` decorator. -The new `postprocess` feature gives you more control on results. +### `copy_level` parameter has been deprecated in `@cached` +The `copy_level` parameter has been marked as deprecated and no longer has any effect. +The new `postprocess` feature gives you more control over results. ```python # v5 @@ -23,8 +21,8 @@ def add(a: int, b: int) -> dict: return {a: b} ``` -### `TTLCache.ttl` has renamed to `TTLCache.global_ttl` -`TTLCache.ttl` has renamed to `TTLCache.global_ttl` because it was causing developers to confuse the usage of +### `TTLCache.ttl` has been renamed to `TTLCache.global_ttl` +`TTLCache.ttl` has been renamed to `TTLCache.global_ttl` because it was causing developers to confuse the usage of `TTLCache.ttl` with `VTTLCache`'s `ttl` parameter. ```python @@ -37,12 +35,12 @@ cache = cachebox.TTLCache(maxsize=125, global_ttl=10) print(cache.global_ttl) ``` -### Maxmemory limit has removed -In version 5, we could limit the cache classes by memory using `maxmemory` parameter. -But it caused performance `-75%`, and that was not the library targets. Our focus is on performance & speed. +### Maxmemory limit has been removed +In version 5, we could limit the cache classes by memory using the `maxmemory` parameter. +But it caused a -75% performance regression, and that was not the library's target. Our focus is on performance & speed. So we removed it, but added a new parameter: `getsizeof`. A callable that computes the size of a key-value pair. Now you can use this to implement weighted caching - for example, sizing entries by memory footprint or byte length. -This could cover `maxmemory`, while keeps performance on top. +This could cover `maxmemory`, while keeping performance on top. ```python # v5 @@ -57,7 +55,7 @@ def getsizeof(key, val): cache = cachebox.LRUCache(maxsize=1000, getsizeof=getsizeof) ``` -Due to this breaking change, we also removed `memory` property from cache classes, and +Due to this breaking change, we also removed the `memory` property from cache classes, and added new methods: `current_size` and `remaining_size`. ```python @@ -69,28 +67,23 @@ print(cache.current_size()) print(cache.remaining_size()) ``` -### `CacheInfo` fields have changed -The `cachebox.utils.CacheInfo` namedtuple fields has breaking changes: -- `memory` field removed. -- `length` renamed to `size`. +### `cachedmethod` has been removed +`cachedmethod` was deprecated in v5.1.0 and has been fully removed in v6. Use `cached` with a `lambda self:` cache accessor instead: ```python -info = cached_function.cache_info() - # v5 -print(info.length) -print(info.memory) +@cachebox.cachedmethod(cachebox.TTLCache(0, ttl=10)) +def my_method(self, name: str): ... # v6 -print(info.size) -print(info.memory) # AttributeError +@cachebox.cached(lambda self: self._cache) +def my_method(self, name: str): ... ``` ## v4 → v5 These are changes that are not compatible with the previous version: -### `CacheInfo.cachememory` renamed to `CacheInfo.memory` - +### `CacheInfo.cachememory` has been renamed to `CacheInfo.memory` ```python info = func.cache_info() @@ -102,7 +95,6 @@ print(info.memory) ``` ### `__eq__` errors are no longer silently swallowed - In v4, errors raised inside a custom `__eq__` method were caught and converted to a `KeyError`. In v5, they propagate normally. @@ -119,8 +111,7 @@ cache[A()] ``` ### Cache comparisons are no longer order-dependent - -In v4, two caches with the same keys/values in different insertion order were considered unequal. +In v4, two caches with the same keys/values in a different insertion order were considered unequal. In v5, cache equality follows standard dictionary semantics. ```python @@ -136,7 +127,6 @@ print(c1 == c2) ``` ### `cachedmethod` deprecated - `cachedmethod` is deprecated since v5.1.0. Use `cached` with a `lambda self:` cache accessor: ```python diff --git a/docs/docs/tips.md b/docs/docs/tips.md index adb6902..2cdc252 100644 --- a/docs/docs/tips.md +++ b/docs/docs/tips.md @@ -36,7 +36,6 @@ deep = copy.deepcopy(cache) # deep copy ``` ## Avoiding Cache Stampede - Cachebox uses a distributed lock system internally to prevent the [cache stampede](https://en.wikipedia.org/wiki/Cache_stampede) problem — multiple concurrent requests recomputing the same missing entry simultaneously. @@ -51,26 +50,22 @@ cache = cachebox.LRUCache(maxsize=10_000, capacity=10_000) ``` ## Thread Safety - All cache operations (reads, writes, eviction) are protected by internal Rust mutexes. You do **not** need to add external synchronisation. ## TTL and Frozen Caches +`Frozen` cannot prevent TTL expiration in `TTLCache` or `VTTLCache`. +Items will still expire naturally even when the cache is frozen. -!!! note - - `Frozen` cannot prevent TTL expiration in `TTLCache` or `VTTLCache`. - Items will still expire naturally even when the cache is frozen. - - ```python - from cachebox import Frozen, TTLCache - import time +```python +from cachebox import Frozen, TTLCache +import time - cache = TTLCache(0, ttl=1, iterable={1: "a"}) - frozen = Frozen(cache) - time.sleep(1) - print(len(frozen)) # 0 — expired despite being frozen - ``` +cache = TTLCache(0, ttl=1, iterable={1: "a"}) +frozen = Frozen(cache) +time.sleep(1) +print(len(frozen)) # 0 — expired despite being frozen +``` ## Attached attributes to cached functions When you use the `@cached` decorator, If *cache* isn't a lambda/function, these attributes will be attached to @@ -93,7 +88,7 @@ your function: ``` !!! tip - You can use [get_cached_cache function](cachebox.utils.get_cached_cache) to prevent lint + You can use [get_cached_cache function](api/utils.md#cachebox.utils.get_cached_cache) to prevent lint & IDE warnings. ```python @@ -117,7 +112,7 @@ your function: ``` !!! tip - You can use [get_cached_cache_info function](cachebox.utils.get_cached_cache_info) to prevent lint + You can use [get_cached_cache_info function](api/utils.md#cachebox.utils.get_cached_cache_info) to prevent lint & IDE warnings. ```python @@ -141,7 +136,7 @@ your function: ``` !!! tip - You can use [clear_cached_cache function](cachebox.utils.clear_cached_cache) to prevent lint + You can use [clear_cached_cache function](api/utils.md#cachebox.utils.clear_cached_cache) to prevent lint & IDE warnings. ```python @@ -168,7 +163,7 @@ your function: ``` !!! tip - You can use [get_cached_callback function](cachebox.utils.get_cached_callback) to prevent lint + You can use [get_cached_callback function](api/utils.md#cachebox.utils.get_cached_callback) to prevent lint & IDE warnings. ```python @@ -177,4 +172,69 @@ your function: ## TTLCache/VTTLCache background thread -TODO +By default, both `TTLCache` and `VTTLCache` use **lazy expiry**: stale entries are +only cleaned up when the cache is interacted with (e.g. on insert, lookup, or +iteration). A completely idle cache will hold expired entries in memory until +the next interaction. + +To reclaim expired entries proactively — independent of any method calls — pass a +`sweep_interval` to start a background sweeper thread: + +```python +import cachebox +from datetime import timedelta + +# Sweep every 30 seconds +ttl_cache = cachebox.TTLCache(maxsize=1000, global_ttl=60, sweep_interval=30) + +# timedelta is also accepted +vttl_cache = cachebox.VTTLCache(maxsize=1000, sweep_interval=timedelta(seconds=30)) +``` + +The thread is a **daemon thread**, meaning it will not prevent the Python process +from exiting when the main thread finishes. + +!!! note + + `sweep_interval` must be **≥ 1 second**. Smaller values raise a `ValueError`: + + ```python + cachebox.TTLCache(100, global_ttl=5, sweep_interval=0.5) + # ValueError: sweep_interval must be more than 1 seconds. + ``` + +```python +cache = cachebox.TTLCache(100, global_ttl=60, sweep_interval=30) +print(cache.sweep_interval) # 30.0 + +# Without a sweeper, sweep_interval is None +cache2 = cachebox.TTLCache(100, global_ttl=60) +print(cache2.sweep_interval) # None +``` + +Call `stop_sweeper()` when you want to halt background sweeping without +destroying the cache itself. This is useful when you need to pause periodic +eviction or cleanly shut down the thread before the cache goes out of scope: + +```python +cache = cachebox.TTLCache(100, global_ttl=60, sweep_interval=10) + +# ... later, during shutdown ... +cache.stop_sweeper() +``` + +!!! note + + The sweeper thread is also stopped automatically when the cache is garbage + collected (via `__del__`), so manual cleanup is only necessary when explicit + lifecycle control is required. + +Use a **sweeper** when: +- The cache may be idle for long periods but memory should still be reclaimed. +- You need to bound the window in which stale data could be observed (e.g. via `items()` or `__iter__`). +- You are using `VTTLCache` with short, heterogeneous TTLs and want predictable cleanup. + +Stick with **lazy expiry** when: +- The cache sees regular traffic and on-access cleanup is sufficient. +- You want to avoid any background thread overhead. +- Memory pressure from temporarily lingering stale entries is acceptable. diff --git a/src/pyclasses/cache.rs b/src/pyclasses/cache.rs index 9a37943..14be865 100644 --- a/src/pyclasses/cache.rs +++ b/src/pyclasses/cache.rs @@ -158,10 +158,12 @@ impl PyCache { #[inline] fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + let inner = self.0.get(); let policy = inner.policy(); - policy.table().capacity() * std::mem::size_of::() + FIXED_SIZE + (policy.table().capacity() * std::mem::size_of::()) } #[inline] diff --git a/src/pyclasses/fifocache.rs b/src/pyclasses/fifocache.rs index 7a586aa..08bf7a4 100644 --- a/src/pyclasses/fifocache.rs +++ b/src/pyclasses/fifocache.rs @@ -165,12 +165,14 @@ impl PyFIFOCache { #[inline] fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + let inner = self.0.get(); let policy = inner.policy(); let table_cap = policy.table().capacity() * std::mem::size_of::(); let vecdeque_cap = policy.entries().capacity() * std::mem::size_of::(); - table_cap + vecdeque_cap + FIXED_SIZE + table_cap + vecdeque_cap } #[inline] diff --git a/src/pyclasses/lfucache.rs b/src/pyclasses/lfucache.rs index fb7208b..bd6193d 100644 --- a/src/pyclasses/lfucache.rs +++ b/src/pyclasses/lfucache.rs @@ -181,13 +181,15 @@ impl PyLFUCache { #[inline] fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + let inner = self.0.get(); let policy = inner.policy(); let table_cap = policy.table().capacity() * 8; let list_cap = policy.heap().len() * std::mem::size_of::(); - table_cap + list_cap + FIXED_SIZE + table_cap + list_cap } #[inline] diff --git a/src/pyclasses/lrucache.rs b/src/pyclasses/lrucache.rs index eba106a..393468e 100644 --- a/src/pyclasses/lrucache.rs +++ b/src/pyclasses/lrucache.rs @@ -191,13 +191,15 @@ impl PyLRUCache { #[inline] fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + let inner = self.0.get(); let policy = inner.policy(); let table_cap = policy.table().capacity() * 8; let list_cap = policy.list().len() * std::mem::size_of::(); - table_cap + list_cap + FIXED_SIZE + table_cap + list_cap } #[inline] diff --git a/src/pyclasses/rrcache.rs b/src/pyclasses/rrcache.rs index 1b33bba..55326d8 100644 --- a/src/pyclasses/rrcache.rs +++ b/src/pyclasses/rrcache.rs @@ -162,10 +162,12 @@ impl PyRRCache { #[inline] fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + let inner = self.0.get(); let policy = inner.policy(); - policy.table().capacity() * std::mem::size_of::() + FIXED_SIZE + policy.table().capacity() * std::mem::size_of::() } #[inline] diff --git a/src/pyclasses/ttlcache.rs b/src/pyclasses/ttlcache.rs index e8d949c..deb5f19 100644 --- a/src/pyclasses/ttlcache.rs +++ b/src/pyclasses/ttlcache.rs @@ -157,6 +157,8 @@ impl PyTTLCache { #[inline] fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + let inner = self.0.get(); let policy = inner.policy(); @@ -164,7 +166,7 @@ impl PyTTLCache { let vecdeque_cap = policy.entries().capacity() * std::mem::size_of::(); - table_cap + vecdeque_cap + FIXED_SIZE + table_cap + vecdeque_cap } #[inline] diff --git a/src/pyclasses/vttlcache.rs b/src/pyclasses/vttlcache.rs index 84baf30..451199a 100644 --- a/src/pyclasses/vttlcache.rs +++ b/src/pyclasses/vttlcache.rs @@ -145,13 +145,15 @@ impl PyVTTLCache { #[inline] fn __sizeof__(&self) -> usize { + const FIXED_SIZE: usize = std::mem::size_of::>(); + let inner = self.0.get(); let policy = inner.policy(); let table_cap = policy.table().capacity() * 8; let list_cap = policy.heap().len() * std::mem::size_of::(); - table_cap + list_cap + FIXED_SIZE + table_cap + list_cap } #[inline] From 82e514250d11bbd994bafc50e0c2c1bbfdd0c43a Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 12:08:35 +0330 Subject: [PATCH 47/60] Update docs --- docs/docs/getting-started.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/docs/getting-started.md b/docs/docs/getting-started.md index 881426e..527b003 100644 --- a/docs/docs/getting-started.md +++ b/docs/docs/getting-started.md @@ -149,10 +149,6 @@ Ready to use postprocessors: - [postprocess_deepcopy function](api/utils.md#cachebox.utils.postprocess_deepcopy) - [postprocess_deepcopy_mutables function](api/utils.md#cachebox.utils.postprocess_deepcopy_mutables) -!!! note - - Added since v6.0.0, and the `copy_level` parameter removed due to this feature. - ### Bypass the Cache for a Single Call Pass `cachebox__ignore=True` to skip the cache entirely: From d74ebfa5ed7359e068509482417f11b341c78898 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 12:11:07 +0330 Subject: [PATCH 48/60] Remove benchmarks from tests --- requirements-dev.txt | 1 - tests/benchmark.py | 81 -------------------------------------------- 2 files changed, 82 deletions(-) delete mode 100644 tests/benchmark.py diff --git a/requirements-dev.txt b/requirements-dev.txt index 0725ddd..5e89a3b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,6 @@ maturin pytest hypothesis -pytest-benchmark pytest-asyncio mkdocs-material mkdocstrings[python] diff --git a/tests/benchmark.py b/tests/benchmark.py deleted file mode 100644 index fa07e41..0000000 --- a/tests/benchmark.py +++ /dev/null @@ -1,81 +0,0 @@ -import typing - -import cachebox - -from . import mixins - - -class TestCache(mixins.BenchmarkMixin): - def create_cache( - self, - maxsize: int = 10, - iterable: typing.Any = None, - capacity: int = 0, - getsizeof: typing.Any = None, - ) -> cachebox.Cache: - return cachebox.Cache(maxsize, iterable, capacity=capacity, getsizeof=getsizeof) - - -class TestFIFOCache(mixins.BenchmarkMixin): - def create_cache( - self, - maxsize: int = 10, - iterable: typing.Any = None, - capacity: int = 0, - getsizeof: typing.Any = None, - ) -> cachebox.FIFOCache: - return cachebox.FIFOCache( - maxsize, iterable, capacity=capacity, getsizeof=getsizeof - ) - - -class TestRRCache(mixins.BenchmarkMixin): - def create_cache( - self, - maxsize: int = 10, - iterable: typing.Any = None, - capacity: int = 0, - getsizeof: typing.Any = None, - ) -> cachebox.RRCache: - return cachebox.RRCache( - maxsize, iterable, capacity=capacity, getsizeof=getsizeof - ) - - -class TestLRUCache(mixins.BenchmarkMixin): - def create_cache( - self, - maxsize: int = 10, - iterable: typing.Any = None, - capacity: int = 0, - getsizeof: typing.Any = None, - ) -> cachebox.LRUCache: - return cachebox.LRUCache( - maxsize, iterable, capacity=capacity, getsizeof=getsizeof - ) - - -class TestLFUCache(mixins.BenchmarkMixin): - def create_cache( - self, - maxsize: int = 10, - iterable: typing.Any = None, - capacity: int = 0, - getsizeof: typing.Any = None, - ) -> cachebox.LFUCache: - return cachebox.LFUCache( - maxsize, iterable, capacity=capacity, getsizeof=getsizeof - ) - - -class TestTTLCache(mixins.BenchmarkMixin): - def create_cache( - self, - maxsize: int = 10, - iterable: typing.Any = None, - capacity: int = 0, - getsizeof: typing.Any = None, - ) -> cachebox.TTLCache: - return cachebox.TTLCache( - maxsize, 10, iterable, capacity=capacity, getsizeof=getsizeof - ) From 95ce03d33e38d4a9a7af37e6bb291d6d632db802 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 12:22:30 +0330 Subject: [PATCH 49/60] Fix tests and utils --- cachebox/utils.py | 9 ++++++++- tests/test_impls.py | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/cachebox/utils.py b/cachebox/utils.py index 73534de..01f7139 100644 --- a/cachebox/utils.py +++ b/cachebox/utils.py @@ -484,7 +484,14 @@ def _wrapped(*args, **kwds): if not cache_is_fn: _wrapped.cache = cache # type: ignore[attr-defined] - _wrapped.cache_info = lambda: CacheInfo(hits, misses, cache.maxsize, len(cache)) # type: ignore[attr-defined] + _wrapped.cache_info = lambda: CacheInfo( # type: ignore[attr-defined] + hits, + misses, + cache.maxsize, + cache.current_size(), + len(cache), + cache.__sizeof__(), + ) def cache_clear() -> None: nonlocal hits, misses diff --git a/tests/test_impls.py b/tests/test_impls.py index 14c3a95..f266c3b 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -306,7 +306,7 @@ def test_clear_resets_fifo_order(self): assert cache.last() == 30 @pytest.mark.skipif( - cachebox._small_offset_feature, + not cachebox._small_offset_feature, reason="requires small-offset feature flag", ) def test_edge_case_of_front_offset_overflow(self): @@ -1160,7 +1160,7 @@ def test_clear_resets_fifo_order(self): assert cache.last() == 30 @pytest.mark.skipif( - cachebox._small_offset_feature, + not cachebox._small_offset_feature, reason="requires small-offset feature flag", ) def test_edge_case_of_front_offset_overflow(self): From 1701769bad74d6e4a40aedb4176d35a1ec7bba69 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 12:24:31 +0330 Subject: [PATCH 50/60] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a424826..3d46dd0 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ ------- > [!WARNING]\ -> The new version v6 has incompatibilities with v5. For more info see [Incompatible changes](#incompatible-changes). +> The new version v6 has incompatibilities with v5. For more info see [Migration Guide](https://awolverp.github.io/cachebox/migration). ### What does it do? You can easily perform powerful caching operations in Python as fast as possible. @@ -24,7 +24,7 @@ This can make your application a lot faster and it can be a good choice in compl **Ideal for optimizing large-scale applications** with efficient, low-overhead caching. **Key Features:** -- 🚀 Extremely fast (10-50x faster than other caching libraries -- [*benchmarks*](https://github.com/awolverp/cachebox-benchmark)) +- 🚀 Extremely fast (10-50x faster than other caching libraries - [*benchmarks*](https://github.com/awolverp/cachebox-benchmark)) - 📊 Minimal memory footprint - 🔥 Full-featured and user-friendly - 🧶 Completely thread-safe From 647eb9f42189802e6d5d83a9b2a696037953fa6c Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 14:59:58 +0330 Subject: [PATCH 51/60] Update github workflows --- .github/workflows/CI.yml | 220 +++++++++++++----------------- .github/workflows/python-test.yml | 64 ++++----- 2 files changed, 130 insertions(+), 154 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 6c97180..03ad80d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,259 +1,235 @@ -name: CI +name: Publish on: push: tags: - - '*' - + - "*" + workflow_dispatch: +concurrency: + group: ci-${{ github.ref }} + cancel-in-progress: ${{ !startsWith(github.ref, 'refs/tags/') }} + jobs: - linux: - runs-on: ${{ matrix.platform.runner }} + build-linux: + runs-on: ubuntu-latest strategy: matrix: platform: - { - runner: ubuntu-latest, target: x86_64, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "--features mimalloc", } - { - runner: ubuntu-latest, target: x86, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "--features mimalloc", } - { - runner: ubuntu-latest, target: aarch64, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "--features mimalloc --zig", } - { - runner: ubuntu-latest, target: armv7, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "", } - { - runner: ubuntu-latest, target: s390x, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "", } - { - runner: ubuntu-latest, target: ppc64le, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", - maturin-args: "", } + steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6.0.2 with: persist-credentials: false - - - uses: actions/setup-python@v6 + + - uses: actions/setup-python@v6.2.0 with: - python-version: 3.x - - - name: "Build wheels" - uses: PyO3/maturin-action@v1.49.4 + python-version: "3.x" + + - uses: PyO3/maturin-action@v1.51.0 with: target: ${{ matrix.platform.target }} - args: > - --release --out dist ${{ matrix.platform.maturin-args }} - --interpreter ${{ matrix.platform.interpreter }} + args: --release --out dist --interpreter ${{ matrix.platform.interpreter }} sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} manylinux: auto rust-toolchain: nightly - - - name: "Upload wheels" - uses: actions/upload-artifact@v4.6.2 + + - uses: actions/upload-artifact@v7.0.1 with: name: wheels-linux-${{ matrix.platform.target }} path: dist - - musllinux: - runs-on: ${{ matrix.platform.runner }} + + build-musllinux: + runs-on: ubuntu-latest strategy: matrix: platform: - { - runner: ubuntu-latest, target: x86_64, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", } - { - runner: ubuntu-latest, target: x86, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", } - { - runner: ubuntu-latest, target: aarch64, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", } - { - runner: ubuntu-latest, target: armv7, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", } steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6.0.2 with: persist-credentials: false - - uses: actions/setup-python@v6 + + - uses: actions/setup-python@v6.2.0 with: - python-version: 3.x - - name: "Build wheels" - uses: PyO3/maturin-action@v1.49.4 + python-version: "3.x" + + - uses: PyO3/maturin-action@v1.51.0 with: target: ${{ matrix.platform.target }} - args: > - --release --out dist - --interpreter ${{ matrix.platform.interpreter }} - --features mimalloc + args: --release --out dist --interpreter ${{ matrix.platform.interpreter }} sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} manylinux: musllinux_1_2 rust-toolchain: nightly - - - name: "Upload wheels" - uses: actions/upload-artifact@v4.6.2 + + - uses: actions/upload-artifact@v7.0.1 with: name: wheels-musllinux-${{ matrix.platform.target }} path: dist - - windows: - runs-on: ${{ matrix.platform.runner }} + + build-windows: + runs-on: windows-latest strategy: matrix: platform: - { - runner: windows-latest, target: x64, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" - } - - { - runner: windows-latest, - target: x86, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t" + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", } + - { target: x86, interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t" } + steps: - - uses: actions/checkout@v5.0.0 + - uses: actions/checkout@v6.0.2 with: persist-credentials: false - - uses: actions/setup-python@v6.0.0 + + - uses: actions/setup-python@v6.2.0 with: - python-version: 3.x + python-version: "3.x" architecture: ${{ matrix.platform.target }} - - name: "Build wheels" - uses: PyO3/maturin-action@v1.49.4 + + - uses: PyO3/maturin-action@v1.51.0 with: target: ${{ matrix.platform.target }} - args: > - --release --out dist - --interpreter ${{ matrix.platform.interpreter }} - --features mimalloc + args: --release --out dist --interpreter ${{ matrix.platform.interpreter }} sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} rust-toolchain: nightly - - name: "Upload wheels" - uses: actions/upload-artifact@v4.6.2 + - uses: actions/upload-artifact@v7.0.1 with: name: wheels-windows-${{ matrix.platform.target }} path: dist - macos: - runs-on: ${{ matrix.platform.runner }} + build-macos: + runs-on: macos-latest strategy: matrix: platform: - { - runner: macos-latest, - target: x86_64, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" - } + target: x86_64, + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", + } - { - runner: macos-latest, - target: aarch64, - interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11" - } + target: aarch64, + interpreter: "3.10 3.11 3.12 3.13 3.13t 3.14 3.14t pypy3.11", + } + steps: - - uses: actions/checkout@v5.0.0 + - uses: actions/checkout@v6.0.2 with: persist-credentials: false - - - uses: actions/setup-python@v6.0.0 + + - uses: actions/setup-python@v6.2.0 with: - python-version: 3.x - - - name: "Build wheels" - uses: PyO3/maturin-action@v1.49.4 + python-version: "3.x" + + - uses: PyO3/maturin-action@v1.51.0 with: target: ${{ matrix.platform.target }} - args: > - --release --out dist - --interpreter ${{ matrix.platform.interpreter }} - --features mimalloc + args: --release --out dist --interpreter ${{ matrix.platform.interpreter }} sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} rust-toolchain: nightly - - name: "Upload wheels" - uses: actions/upload-artifact@v4.6.2 + - uses: actions/upload-artifact@v7.0.1 with: name: wheels-macos-${{ matrix.platform.target }} path: dist build-sdist: - name: "build sdist" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5.0.0 + - uses: actions/checkout@v6.0.2 with: persist-credentials: false - - - name: "Build sdist" - uses: PyO3/maturin-action@v1.49.4 + + - name: Build sdist + uses: PyO3/maturin-action@v1.51.0 with: command: sdist args: --out dist rust-toolchain: nightly - - - name: "Upload sdist" - uses: actions/upload-artifact@v4.6.2 + + - name: Upload sdist + uses: actions/upload-artifact@v7.0.1 with: name: wheels-sdist path: dist - + release: - name: "release" runs-on: ubuntu-latest - if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }} - needs: [linux, musllinux, windows, macos, build-sdist] + if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' + needs: + - build-linux + - build-musllinux + - build-windows + - build-macos + - build-sdist + permissions: - # Use to sign the release artifacts id-token: write - # Used to upload release artifacts contents: write - # Used to generate artifact attestation attestations: write + steps: - - uses: actions/download-artifact@v5.0.0 - - name: "Generate artifact attestation" - uses: actions/attest-build-provenance@v3.0.0 + - uses: actions/download-artifact@v8.0.1 with: - subject-path: "wheels-*/*" - - - name: "Publish to PyPI" - if: ${{ startsWith(github.ref, 'refs/tags/') }} - uses: PyO3/maturin-action@v1.49.4 + pattern: wheels-* + merge-multiple: true + path: dist + + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v4.1.0 + with: + subject-path: dist/* + + - name: Publish to PyPI + if: startsWith(github.ref, 'refs/tags/') + uses: PyO3/maturin-action@v1.51.0 env: MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} - + with: command: upload - args: --non-interactive --skip-existing wheels-*/* + args: --non-interactive --skip-existing dist/* rust-toolchain: nightly diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 9ea7d34..7c9a354 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -1,4 +1,4 @@ -name: python-test +name: Python Test on: push: @@ -8,10 +8,23 @@ on: permissions: contents: read +concurrency: + group: tests-${{ github.ref }} + cancel-in-progress: true + +env: + RUST_BACKTRACE: 1 + jobs: test-python: - if: ${{ contains(github.event.head_commit.message, '!test') || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' }} + if: >- + contains(github.event.head_commit.message, '!test') || + github.event_name == 'workflow_dispatch' || + github.event_name == 'pull_request' + name: test ${{ matrix.python-version }} + runs-on: ubuntu-latest + continue-on-error: ${{ endsWith(matrix.python-version, 't') }} strategy: fail-fast: false matrix: @@ -25,68 +38,55 @@ jobs: - "3.14t" - "pypy3.11" - runs-on: ubuntu-latest - continue-on-error: ${{ endsWith(matrix.python-version, 't') }} - steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6.0.2 - - name: install rust stable - uses: dtolnay/rust-toolchain@stable - with: - toolchain: nightly + - uses: dtolnay/rust-toolchain@nightly - - name: set up python - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6.2.0 with: python-version: ${{ matrix.python-version }} allow-prereleases: true - - run: pip install pytest pytest-asyncio "maturin[patchelf]" + - run: pip install pytest pytest-asyncio hypothesis maturin - - run: pip install -e . - env: - RUST_BACKTRACE: 1 + - run: maturin develop --features small-offset - run: pip freeze - run: pytest -vv env: - RUST_BACKTRACE: 1 HYPOTHESIS_PROFILE: slow test-os: - if: ${{ contains(github.event.head_commit.message, '!test') || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' }} - name: test on ${{ matrix.os }} + if: >- + contains(github.event.head_commit.message, '!test') || + github.event_name == 'workflow_dispatch' || + github.event_name == 'pull_request' + name: test on ${{ matrix.os }} + runs-on: ${{ matrix.os }}-latest strategy: fail-fast: false matrix: os: [ubuntu, macos, windows] - runs-on: ${{ matrix.os }}-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6.0.2 - - name: install rust stable - uses: dtolnay/rust-toolchain@stable - with: - toolchain: nightly + - uses: dtolnay/rust-toolchain@nightly - - name: set up python - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6.2.0 with: python-version: "3.13" + allow-prereleases: true - - run: pip install pytest pytest-asyncio maturin + - run: pip install pytest pytest-asyncio hypothesis maturin - - run: pip install -e . - env: - RUST_BACKTRACE: 1 + - run: maturin develop --features small-offset - run: pip freeze - run: pytest -vv env: - RUST_BACKTRACE: 1 HYPOTHESIS_PROFILE: slow From f26b7687680173c95905ce2290669f5528bb3824 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 15:03:37 +0330 Subject: [PATCH 52/60] Update github workflows --- .github/workflows/python-test.yml | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 7c9a354..2d90d9d 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -1,7 +1,6 @@ name: Python Test on: - push: pull_request: workflow_dispatch: @@ -48,13 +47,10 @@ jobs: python-version: ${{ matrix.python-version }} allow-prereleases: true - - run: pip install pytest pytest-asyncio hypothesis maturin - + - run: pip install virtualenv pytest pytest-asyncio hypothesis maturin + - run: virtualenv --no-vcs-ignore .venv - run: maturin develop --features small-offset - - - run: pip freeze - - - run: pytest -vv + - run: pytest -v env: HYPOTHESIS_PROFILE: slow @@ -81,12 +77,9 @@ jobs: python-version: "3.13" allow-prereleases: true - - run: pip install pytest pytest-asyncio hypothesis maturin - + - run: pip install virtualenv pytest pytest-asyncio hypothesis maturin + - run: virtualenv --no-vcs-ignore .venv - run: maturin develop --features small-offset - - - run: pip freeze - - - run: pytest -vv + - run: pytest -v env: HYPOTHESIS_PROFILE: slow From 79bbd3c55cd2a313b7d886a655f73cace489ca3b Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 15:04:05 +0330 Subject: [PATCH 53/60] Update github workflows --- .github/workflows/python-test.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 2d90d9d..d5f38df 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -16,11 +16,6 @@ env: jobs: test-python: - if: >- - contains(github.event.head_commit.message, '!test') || - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request' - name: test ${{ matrix.python-version }} runs-on: ubuntu-latest continue-on-error: ${{ endsWith(matrix.python-version, 't') }} @@ -55,11 +50,6 @@ jobs: HYPOTHESIS_PROFILE: slow test-os: - if: >- - contains(github.event.head_commit.message, '!test') || - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request' - name: test on ${{ matrix.os }} runs-on: ${{ matrix.os }}-latest strategy: From d4477383a3122d94f92b06bc872a981bf7a72667 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 15:14:54 +0330 Subject: [PATCH 54/60] Fix typing errors --- cachebox/_cachebox.py | 8 ++++---- cachebox/_core.pyi | 4 ++-- cachebox/utils.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cachebox/_cachebox.py b/cachebox/_cachebox.py index c62b46c..b8462ef 100644 --- a/cachebox/_cachebox.py +++ b/cachebox/_cachebox.py @@ -117,10 +117,10 @@ def __init__( self, maxsize: int, global_ttl: float | timedelta, - iterable: _IterableType[KT, VT] | None = None, + iterable: typing.Optional["_IterableType[KT, VT]"] = None, *, capacity: int = 0, - getsizeof: typing.Callable[[KT, VT]] | None = None, + getsizeof: typing.Callable[[KT, VT], int] | None = None, sweep_interval: float | timedelta | None = None, ) -> None: """ @@ -289,11 +289,11 @@ class VTTLCache(_CoreVTTLCache[KT, VT]): def __init__( self, maxsize: int, - iterable: _IterableType[KT, VT] | None = None, + iterable: typing.Optional["_IterableType[KT, VT]"] = None, ttl: float | timedelta | datetime | None = None, *, capacity: int = 0, - getsizeof: typing.Callable[[KT, VT]] | None = None, + getsizeof: typing.Callable[[KT, VT], int] | None = None, sweep_interval: float | timedelta | None = None, ) -> None: """ diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index d27b9cf..3e80741 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -1229,7 +1229,7 @@ class TTLCache(BaseCacheImpl[KT, VT]): iterable: _IterableType[KT, VT] | None = None, *, capacity: int = 0, - getsizeof: typing.Callable[[KT, VT]] | None = None, + getsizeof: typing.Callable[[KT, VT], int] | None = None, ) -> None: """ Initializes a new TTLCache instance. @@ -1466,7 +1466,7 @@ class VTTLCache(BaseCacheImpl[KT, VT]): ttl: float | timedelta | datetime | None = None, *, capacity: int = 0, - getsizeof: typing.Callable[[KT, VT]] | None = None, + getsizeof: typing.Callable[[KT, VT], int] | None = None, ) -> None: """ Initializes a new TTLCache instance. diff --git a/cachebox/utils.py b/cachebox/utils.py index 01f7139..c7fd21b 100644 --- a/cachebox/utils.py +++ b/cachebox/utils.py @@ -267,7 +267,7 @@ def __setitem__(self, key: KT, value: VT) -> None: def update( self, - iterable: _IterableType[KT, VT], + iterable: "_IterableType[KT, VT]", *args: typing.Any, **kwargs: typing.Any, ) -> None: From d7231e353c7c09ccf7b52878aff16650433b1ad3 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 15:20:26 +0330 Subject: [PATCH 55/60] Fix PyPy; PyPy doesn't have PyTuple_SET_ITEM and we should use PyTuple_SetItem instead --- src/internal/utils.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/internal/utils.rs b/src/internal/utils.rs index e239fdd..0724816 100644 --- a/src/internal/utils.rs +++ b/src/internal/utils.rs @@ -81,11 +81,11 @@ pub unsafe fn call_getsizeof( return Err(pyo3::PyErr::fetch(py)); } - // PyTuple_SET_ITEM steals the reference, so we need to increment first. + // PyTuple_SetItem steals the reference, so we need to increment first. pyo3::ffi::Py_INCREF(key); pyo3::ffi::Py_INCREF(value); - pyo3::ffi::PyTuple_SET_ITEM(args, 0, key); - pyo3::ffi::PyTuple_SET_ITEM(args, 1, value); + pyo3::ffi::PyTuple_SetItem(args, 0, key); + pyo3::ffi::PyTuple_SetItem(args, 1, value); let result = pyo3::ffi::PyObject_Call(getsizeof.as_ptr(), args, std::ptr::null_mut()); pyo3::ffi::Py_DECREF(args); From 5b387d79b9d6bdbc391a6f1f7283bdde6d67ef51 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 16:31:23 +0330 Subject: [PATCH 56/60] Fix FIFOCache & TTLCache internal decrement_indexes function --- src/policies/fifopolicy.rs | 42 ++++++++++++++++----------------- src/policies/ttlpolicy.rs | 48 +++++++++++++++++++++----------------- tests/test_impls.py | 39 +++++++++++++++++++++++++------ 3 files changed, 79 insertions(+), 50 deletions(-) diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index 67abb28..1ae9a59 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -165,7 +165,6 @@ impl FIFOPolicy { #[cfg(not(feature = "small-offset"))] const MAX_FRONT_OFFSET: usize = usize::MAX - isize::MAX as usize; - // Use u8::MAX as maximum front offset, useful for tests #[cfg(feature = "small-offset")] const MAX_FRONT_OFFSET: usize = u8::MAX as usize; @@ -176,44 +175,45 @@ impl FIFOPolicy { return; } - // Snapshot so the borrow checker doesn't complain about `self` inside the loops. - let fo = self.front_offset; - - if (end - start) > self.table.num_buckets() / 2 { - // Table-scan path: already O(n), so fold normalization in for free. - // One pass: normalize every index (subtract fo) and decrement those in [start, end). + if (end - start) > self.table.capacity() / 2 { + // Table-scan + // normalize every index (subtract fo) and decrement those in range [start, end). unsafe { for bucket in self.table.iter() { let i = bucket.as_mut(); - let vd_idx = *i - fo; // raw VecDeque index + + let vd_idx = *i - self.front_offset; + *i = if start <= vd_idx && vd_idx < end { vd_idx - 1 // normalize + decrement } else { - vd_idx // normalize only + vd_idx // normalize }; } } } else { - // Entries-scan path: O(range) decrement pass, then O(n) normalization pass. - // - // Pass 1: decrement the logical indices for entries in [start, end). + // Entries-scan + // decrement the logical indices for entries in range [start, end). let shifted = self.entries.range(start..end); for (i, entry) in (start..end).zip(shifted) { let result = unsafe { self.table - .get_mut(entry.key().hash(), |x| Ok::<_, pyo3::PyErr>((*x) - fo == i)) + .get_mut(entry.key().hash(), |x| { + Ok::<_, pyo3::PyErr>((*x) - self.front_offset == i) + }) .unwrap_unchecked() + .expect("index not found") }; - *result.expect("index not found") -= 1; + *result -= 1; } - // Pass 2: normalize every stored index by subtracting `fo`. - // • Entries in [start, end): (vd_idx + fo - 1) - fo = vd_idx - 1 - // • All others: (vd_idx + fo) - fo = vd_idx - if fo != 0 { + // normalize every stored index by subtracting `fo`. + // - Entries in [start, end): (vd_idx + fo - 1) - fo = vd_idx - 1 + // - All others: (vd_idx + fo) - fo = vd_idx + if self.front_offset != 0 { unsafe { for bucket in self.table.iter() { - *bucket.as_mut() -= fo; + *bucket.as_mut() -= self.front_offset; } } } @@ -298,17 +298,17 @@ impl PolicyExt for FIFOPolicy { let front = unsafe { front.unwrap_unchecked() }; - let eq = |index: &usize| Ok::<_, pyo3::PyErr>((*index - self.front_offset) == 0); + let eq = |index: &usize| Ok::<_, pyo3::PyErr>(*index - self.front_offset == 0); if std::hint::unlikely(self.table.remove_entry(front.key().hash(), eq)?.is_none()) { unreachable!("popitem key not found in table"); } shared.generation_version().increment(); + self.decrement_indexes(1, self.entries.len()); let front = unsafe { self.entries.pop_front().unwrap_unchecked() }; self.currsize = self.currsize.saturating_sub(front.size()); - self.decrement_indexes(1, self.entries.len()); Ok(front) } diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index fa03175..4f74ee1 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -265,44 +265,45 @@ impl TTLPolicy { return; } - // Snapshot so the borrow checker doesn't complain about `self` inside the loops. - let fo = self.front_offset; - - if (end - start) > self.table.num_buckets() / 2 { - // Table-scan path: already O(n), so fold normalization in for free. - // One pass: normalize every index (subtract fo) and decrement those in [start, end). + if (end - start) > self.table.capacity() / 2 { + // Table-scan + // normalize every index (subtract fo) and decrement those in range [start, end). unsafe { for bucket in self.table.iter() { let i = bucket.as_mut(); - let vd_idx = *i - fo; // raw VecDeque index + + let vd_idx = *i - self.front_offset; + *i = if start <= vd_idx && vd_idx < end { vd_idx - 1 // normalize + decrement } else { - vd_idx // normalize only + vd_idx // normalize }; } } } else { - // Entries-scan path: O(range) decrement pass, then O(n) normalization pass. - // - // Pass 1: decrement the logical indices for entries in [start, end). + // Entries-scan + // decrement the logical indices for entries in range [start, end). let shifted = self.entries.range(start..end); for (i, entry) in (start..end).zip(shifted) { let result = unsafe { self.table - .get_mut(entry.key().hash(), |x| Ok::<_, pyo3::PyErr>((*x) - fo == i)) + .get_mut(entry.key().hash(), |x| { + Ok::<_, pyo3::PyErr>((*x) - self.front_offset == i) + }) .unwrap_unchecked() + .expect("index not found") }; - *result.expect("index not found") -= 1; + *result -= 1; } - // Pass 2: normalize every stored index by subtracting `fo`. - // • Entries in [start, end): (vd_idx + fo - 1) - fo = vd_idx - 1 - // • All others: (vd_idx + fo) - fo = vd_idx - if fo != 0 { + // normalize every stored index by subtracting `fo`. + // - Entries in [start, end): (vd_idx + fo - 1) - fo = vd_idx - 1 + // - All others: (vd_idx + fo) - fo = vd_idx + if self.front_offset != 0 { unsafe { for bucket in self.table.iter() { - *bucket.as_mut() -= fo; + *bucket.as_mut() -= self.front_offset; } } } @@ -421,22 +422,25 @@ impl PolicyExt for TTLPolicy { } fn evict(&mut self, shared: &Self::Shared) -> pyo3::PyResult { - let front = self.entries.pop_front(); + let front = self.entries.front(); if front.is_none() { - return Err(new_py_error!(PyKeyError, "cache is empty")); + return Err(new_py_error!(PyKeyError, ())); } let front = unsafe { front.unwrap_unchecked() }; - let eq = |index: &usize| Ok::<_, pyo3::PyErr>((*index - self.front_offset) == 0); + let eq = |index: &usize| Ok::<_, pyo3::PyErr>(*index - self.front_offset == 0); if std::hint::unlikely(self.table.remove_entry(front.key().hash(), eq)?.is_none()) { unreachable!("popitem key not found in table"); } shared.generation_version().increment(); - self.currsize = self.currsize.saturating_sub(front.size()); self.decrement_indexes(1, self.entries.len()); + let front = unsafe { self.entries.pop_front().unwrap_unchecked() }; + + self.currsize = self.currsize.saturating_sub(front.size()); + Ok(front) } diff --git a/tests/test_impls.py b/tests/test_impls.py index f266c3b..73a313e 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -317,10 +317,21 @@ def test_edge_case_of_front_offset_overflow(self): U8_MAX = 255 CACHE_SIZE = 10 + total_insertions = U8_MAX + CACHE_SIZE # 265 + + # Phase 1 + cache = self.create_cache(CACHE_SIZE) + for i in range(total_insertions): + cache.insert(i, i * 10) + + # Call popitem 2 times + for i in range(total_insertions, total_insertions + 2): + cache.insert(i, i * 10) + + # Phase 2 cache = self.create_cache(CACHE_SIZE) # drive front_offset to the rebase boundary - total_insertions = U8_MAX + CACHE_SIZE # 265 for i in range(total_insertions): cache.insert(i, i * 10) @@ -1171,10 +1182,24 @@ def test_edge_case_of_front_offset_overflow(self): U8_MAX = 255 CACHE_SIZE = 10 + U8_MAX = 255 + CACHE_SIZE = 10 + + total_insertions = U8_MAX + CACHE_SIZE # 265 + + # Phase 1 + cache = self.create_cache(CACHE_SIZE) + for i in range(total_insertions): + cache.insert(i, i * 10) + + # Call popitem 2 times + for i in range(total_insertions, total_insertions + 2): + cache.insert(i, i * 10) + + # Phase 2 cache = self.create_cache(CACHE_SIZE) # drive front_offset to the rebase boundary - total_insertions = U8_MAX + CACHE_SIZE # 265 for i in range(total_insertions): cache.insert(i, i * 10) @@ -1465,10 +1490,10 @@ def test_mixed_ttl_and_no_ttl(self): def test_multiple_items_expire_independently(self): c = self.create_cache() - c.insert("a", 1, ttl=0.1) - c.insert("b", 2, ttl=0.2) - c.insert("c", 3, ttl=0.3) - time.sleep(0.15) + c.insert("a", 1, ttl=0.2) + c.insert("b", 2, ttl=0.6) + c.insert("c", 3, ttl=1) + time.sleep(0.2) assert "a" not in c assert "b" in c assert "c" in c @@ -1480,7 +1505,7 @@ def test_reinsertion_resets_ttl(self): c = self.create_cache() c.insert("k", "v1", ttl=0.2) time.sleep(0.1) - c.insert("k", "v2", ttl=0.2) # reset + c.insert("k", "v2", ttl=0.3) time.sleep(0.15) # original TTL would have expired; new one should not assert "k" in c From cb7befe22eae0113f34c04629296cda890ab69be Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 16:38:06 +0330 Subject: [PATCH 57/60] Fix tests --- tests/test_impls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_impls.py b/tests/test_impls.py index 73a313e..e1c8149 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -1497,7 +1497,7 @@ def test_multiple_items_expire_independently(self): assert "a" not in c assert "b" in c assert "c" in c - time.sleep(0.1) + time.sleep(0.4) assert "b" not in c assert "c" in c From 13321bc917fce48653885da0f59c642e8aed6c5a Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 19:07:58 +0330 Subject: [PATCH 58/60] Improve FIFOCache/TTLCache tests --- .github/workflows/python-test.yml | 4 +- Cargo.toml | 4 +- cachebox/__init__.py | 2 +- cachebox/_core.pyi | 2 +- src/lib.rs | 8 +- src/policies/fifopolicy.rs | 4 +- src/policies/ttlpolicy.rs | 4 +- tests/test_impls.py | 139 ++++++++++++++++++++++++------ 8 files changed, 130 insertions(+), 37 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index d5f38df..ba967d1 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -44,7 +44,7 @@ jobs: - run: pip install virtualenv pytest pytest-asyncio hypothesis maturin - run: virtualenv --no-vcs-ignore .venv - - run: maturin develop --features small-offset + - run: maturin develop --features use-small-offset - run: pytest -v env: HYPOTHESIS_PROFILE: slow @@ -69,7 +69,7 @@ jobs: - run: pip install virtualenv pytest pytest-asyncio hypothesis maturin - run: virtualenv --no-vcs-ignore .venv - - run: maturin develop --features small-offset + - run: maturin develop --features use-small-offset - run: pytest -v env: HYPOTHESIS_PROFILE: slow diff --git a/Cargo.toml b/Cargo.toml index e678008..d7c886a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,9 @@ pyo3-build-config = {version="0.28.3", default-features=false, features=["resolv default = ["inline-more", "extension-module"] inline-more = [] extension-module = ["pyo3/extension-module"] -small-offset = [] + +# testing features +use-small-offset = [] [lints.clippy] dbg_macro = "warn" diff --git a/cachebox/__init__.py b/cachebox/__init__.py index ca3b6d9..da11c37 100644 --- a/cachebox/__init__.py +++ b/cachebox/__init__.py @@ -7,7 +7,7 @@ from ._cachebox import TTLCache as TTLCache from ._cachebox import VTTLCache as VTTLCache from ._core import __version__ as __version__ -from ._core import _small_offset_feature as _small_offset_feature +from ._core import _use_small_offset_feature as _use_small_offset_feature from .utils import EVENT_HIT as EVENT_HIT from .utils import EVENT_MISS as EVENT_MISS from .utils import Frozen as Frozen diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 3e80741..72861da 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -3,7 +3,7 @@ from datetime import datetime, timedelta from _typeshed import SupportsItems -_small_offset_feature: typing.Final[bool] +_use_small_offset_feature: typing.Final[bool] __version__: typing.Final[str] KT = typing.TypeVar("KT", bound=typing.Hashable) diff --git a/src/lib.rs b/src/lib.rs index e142c6f..a9d1f94 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,11 +84,11 @@ mod _core { m.add("__version__", env!("CARGO_PKG_VERSION"))?; - #[cfg(feature = "small-offset")] - m.add("_small_offset_feature", true)?; + #[cfg(feature = "use-small-offset")] + m.add("_use_small_offset_feature", true)?; - #[cfg(not(feature = "small-offset"))] - m.add("_small_offset_feature", false)?; + #[cfg(not(feature = "use-small-offset"))] + m.add("_use_small_offset_feature", false)?; Ok(()) } diff --git a/src/policies/fifopolicy.rs b/src/policies/fifopolicy.rs index 1ae9a59..31e1632 100644 --- a/src/policies/fifopolicy.rs +++ b/src/policies/fifopolicy.rs @@ -162,10 +162,10 @@ impl FIFOPolicy { #[inline] fn decrement_indexes(&mut self, start: usize, end: usize) { - #[cfg(not(feature = "small-offset"))] + #[cfg(not(feature = "use-small-offset"))] const MAX_FRONT_OFFSET: usize = usize::MAX - isize::MAX as usize; - #[cfg(feature = "small-offset")] + #[cfg(feature = "use-small-offset")] const MAX_FRONT_OFFSET: usize = u8::MAX as usize; // Fast path: shifting the entire front is a single counter increment. diff --git a/src/policies/ttlpolicy.rs b/src/policies/ttlpolicy.rs index 4f74ee1..74a67e9 100644 --- a/src/policies/ttlpolicy.rs +++ b/src/policies/ttlpolicy.rs @@ -252,10 +252,10 @@ impl TTLPolicy { #[inline] fn decrement_indexes(&mut self, start: usize, end: usize) { - #[cfg(not(feature = "small-offset"))] + #[cfg(not(feature = "use-small-offset"))] const MAX_FRONT_OFFSET: usize = usize::MAX - isize::MAX as usize; - #[cfg(feature = "small-offset")] + #[cfg(feature = "use-small-offset")] const MAX_FRONT_OFFSET: usize = u8::MAX as usize; // Fast path: shifting the entire front is a single counter increment. diff --git a/tests/test_impls.py b/tests/test_impls.py index e1c8149..b21e14f 100644 --- a/tests/test_impls.py +++ b/tests/test_impls.py @@ -306,32 +306,79 @@ def test_clear_resets_fifo_order(self): assert cache.last() == 30 @pytest.mark.skipif( - not cachebox._small_offset_feature, - reason="requires small-offset feature flag", + not cachebox._use_small_offset_feature, + reason="requires use-small-offset feature flag", ) - def test_edge_case_of_front_offset_overflow(self): + def test_edge_case_of_front_offset_overflow_entries_scan(self): """ Verifies that FIFOCache correctly rebases its internal `front_offset` - counter when it approaches `u8::MAX` (255 in the small-offset test build). + counter when it approaches `u8::MAX` (255 in the use-small-offset test build). """ U8_MAX = 255 - CACHE_SIZE = 10 + CACHE_SIZE = 2 - total_insertions = U8_MAX + CACHE_SIZE # 265 - - # Phase 1 + # Phase 2 cache = self.create_cache(CACHE_SIZE) + + # drive front_offset to the rebase boundary + total_insertions = U8_MAX + CACHE_SIZE # 265 for i in range(total_insertions): cache.insert(i, i * 10) - # Call popitem 2 times - for i in range(total_insertions, total_insertions + 2): - cache.insert(i, i * 10) + # Snapshot what *should* be alive: the last CACHE_SIZE keys inserted + expected_keys = set(range(total_insertions - CACHE_SIZE, total_insertions)) + + # verify the cache is structurally sound after the rebase + assert len(cache) == CACHE_SIZE + assert cache.is_full() + + # Exact contents — no phantom or missing keys + assert set(cache.keys()) == expected_keys + + # FIFO ordering must be intact + assert cache.first() == min(expected_keys) + assert cache.last() == max(expected_keys) + + # All surviving values are correct + for key in expected_keys: + assert cache[key] == key * 10 + + # All evicted keys are truly gone + for evicted in range(total_insertions - CACHE_SIZE): + assert evicted not in cache + + # Prove the cache keeps working normally after the rebase + + # New insertions must evict the oldest surviving key (min of expected_keys) + next_key = total_insertions # 265 + oldest_before = cache.first() + cache.insert(next_key, next_key * 10) + + assert oldest_before not in cache # oldest was evicted + assert cache[next_key] == next_key * 10 # new entry is present + assert cache.last() == next_key # sits at the tail + assert len(cache) == CACHE_SIZE # size is unchanged + + # Ordering of the remainder is still correct + assert cache.first() == min(expected_keys) + 1 + + # popitem() must still yield the oldest entry + oldest_key, oldest_val = cache.popitem() + assert oldest_val == oldest_key * 10 + + @pytest.mark.skipif( + not cachebox._use_small_offset_feature, + reason="requires use-small-offset feature flag", + ) + def test_edge_case_of_front_offset_overflow_table_scan(self): + U8_MAX = 255 + CACHE_SIZE = 20 # Phase 2 cache = self.create_cache(CACHE_SIZE) # drive front_offset to the rebase boundary + total_insertions = U8_MAX + CACHE_SIZE # 265 for i in range(total_insertions): cache.insert(i, i * 10) @@ -1171,35 +1218,79 @@ def test_clear_resets_fifo_order(self): assert cache.last() == 30 @pytest.mark.skipif( - not cachebox._small_offset_feature, - reason="requires small-offset feature flag", + not cachebox._use_small_offset_feature, + reason="requires use-small-offset feature flag", ) - def test_edge_case_of_front_offset_overflow(self): + def test_edge_case_of_front_offset_overflow_entries_scan(self): """ Verifies that FIFOCache correctly rebases its internal `front_offset` - counter when it approaches `u8::MAX` (255 in the small-offset test build). + counter when it approaches `u8::MAX` (255 in the use-small-offset test build). """ U8_MAX = 255 - CACHE_SIZE = 10 + CACHE_SIZE = 2 - U8_MAX = 255 - CACHE_SIZE = 10 + # Phase 2 + cache = self.create_cache(CACHE_SIZE) + # drive front_offset to the rebase boundary total_insertions = U8_MAX + CACHE_SIZE # 265 - - # Phase 1 - cache = self.create_cache(CACHE_SIZE) for i in range(total_insertions): cache.insert(i, i * 10) - # Call popitem 2 times - for i in range(total_insertions, total_insertions + 2): - cache.insert(i, i * 10) + # Snapshot what *should* be alive: the last CACHE_SIZE keys inserted + expected_keys = set(range(total_insertions - CACHE_SIZE, total_insertions)) + + # verify the cache is structurally sound after the rebase + assert len(cache) == CACHE_SIZE + assert cache.is_full() + + # Exact contents — no phantom or missing keys + assert set(cache.keys()) == expected_keys + + # FIFO ordering must be intact + assert cache.first() == min(expected_keys) + assert cache.last() == max(expected_keys) + + # All surviving values are correct + for key in expected_keys: + assert cache[key] == key * 10 + + # All evicted keys are truly gone + for evicted in range(total_insertions - CACHE_SIZE): + assert evicted not in cache + + # Prove the cache keeps working normally after the rebase + + # New insertions must evict the oldest surviving key (min of expected_keys) + next_key = total_insertions # 265 + oldest_before = cache.first() + cache.insert(next_key, next_key * 10) + + assert oldest_before not in cache # oldest was evicted + assert cache[next_key] == next_key * 10 # new entry is present + assert cache.last() == next_key # sits at the tail + assert len(cache) == CACHE_SIZE # size is unchanged + + # Ordering of the remainder is still correct + assert cache.first() == min(expected_keys) + 1 + + # popitem() must still yield the oldest entry + oldest_key, oldest_val = cache.popitem() + assert oldest_val == oldest_key * 10 + + @pytest.mark.skipif( + not cachebox._use_small_offset_feature, + reason="requires use-small-offset feature flag", + ) + def test_edge_case_of_front_offset_overflow_table_scan(self): + U8_MAX = 255 + CACHE_SIZE = 20 # Phase 2 cache = self.create_cache(CACHE_SIZE) # drive front_offset to the rebase boundary + total_insertions = U8_MAX + CACHE_SIZE # 265 for i in range(total_insertions): cache.insert(i, i * 10) From d4248009fe79c46d692552d65b965a2a8f294550 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 1 Jun 2026 19:40:16 +0330 Subject: [PATCH 59/60] Add mkdocs github workflow --- .github/workflows/docs.yml | 48 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..8c10711 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,48 @@ +name: Deploy Docs + +on: + workflow_dispatch: + +permissions: + contents: write + pages: write + id-token: write + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6.0.2 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v6.2.0 + with: + python-version: 3.14 + + - run: pip install "mkdocs-material" "mkdocstrings[python]" + - run: mkdocs gh-deploy --config-file docs/mkdocs.yml --force + + deploy_mkdocs: + needs: build + + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6.0.2 + with: + ref: gh-pages + + - uses: actions/configure-pages@v6.0.0 + + - uses: actions/upload-pages-artifact@v5.0.0 + with: + path: "." + + - id: deployment + uses: actions/deploy-pages@v5.0.0 From 7f8f3d442c11e5850d43e9e1ae985d925fa97e8b Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 2 Jun 2026 14:01:28 +0330 Subject: [PATCH 60/60] Update docs --- cachebox/_cachebox.py | 58 ++++++++-------- cachebox/_core.pyi | 124 +++++++++++++++++------------------ docs/docs/getting-started.md | 112 ++++++++++++++++++++++++++++--- docs/docs/tips.md | 10 ++- docs/mkdocs.yml | 9 ++- 5 files changed, 203 insertions(+), 110 deletions(-) diff --git a/cachebox/_cachebox.py b/cachebox/_cachebox.py index b8462ef..def0a30 100644 --- a/cachebox/_cachebox.py +++ b/cachebox/_cachebox.py @@ -95,22 +95,22 @@ class TTLCache(_CoreTTLCache[KT, VT]): per-entry TTL granularity is required (consider ``VTTLCache`` instead), or when the system clock is unreliable or subject to adjustment. - Example:: + ```python + from cachebox import TTLCache + import time - from cachebox import TTLCache - import time + cache = TTLCache(0, global_ttl=2) + cache.update({i:str(i) for i in range(10)}) - cache = TTLCache(0, global_ttl=2) - cache.update({i:str(i) for i in range(10)}) + print(cache.get_with_expire(2)) # ('2', 1.99) - print(cache.get_with_expire(2)) # ('2', 1.99) + # Returns the oldest key in cache; this is the one which will be removed by `popitem()` + print(cache.first()) # 0 - # Returns the oldest key in cache; this is the one which will be removed by `popitem()` - print(cache.first()) # 0 - - cache["mykey"] = "value" - time.sleep(2) - cache["mykey"] # KeyError + cache["mykey"] = "value" + time.sleep(2) + cache["mykey"] # KeyError + ``` """ def __init__( @@ -263,27 +263,27 @@ class VTTLCache(_CoreVTTLCache[KT, VT]): from temporarily lingering stale entries is unacceptable and a background thread is not an option. - Example:: - - from cachebox import VTTLCache - import time + ```python + from cachebox import VTTLCache + import time - cache = VTTLCache(100, iterable={i:i for i in range(4)}, ttl=3) - print(len(cache)) # 4 - time.sleep(3) - print(len(cache)) # 0 + cache = VTTLCache(100, iterable={i:i for i in range(4)}, ttl=3) + print(len(cache)) # 4 + time.sleep(3) + print(len(cache)) # 0 - # The "key1" is exists for 5 seconds - cache.insert("key1", "value", ttl=5) - # The "key2" is exists for 2 seconds - cache.insert("key2", "value", ttl=2) + # The "key1" is exists for 5 seconds + cache.insert("key1", "value", ttl=5) + # The "key2" is exists for 2 seconds + cache.insert("key2", "value", ttl=2) - time.sleep(2) - # "key1" is exists for 3 seconds - print(cache.get("key1")) # value + time.sleep(2) + # "key1" is exists for 3 seconds + print(cache.get("key1")) # value - # "key2" has expired - print(cache.get("key2")) # None + # "key2" has expired + print(cache.get("key2")) # None + ``` """ def __init__( diff --git a/cachebox/_core.pyi b/cachebox/_core.pyi index 72861da..f6f4917 100644 --- a/cachebox/_core.pyi +++ b/cachebox/_core.pyi @@ -268,25 +268,25 @@ class Cache(BaseCacheImpl[KT, VT]): Avoid it when cached data can become stale, when the working set is unpredictable in size, or when automatic memory pressure relief is needed. - Example:: + ```python + from cachebox import Cache - from cachebox import Cache + cache = Cache(maxsize=100, iterable=None, capacity=100) - cache = Cache(maxsize=100, iterable=None, capacity=100) + # behaves like a regular dict + cache["key"] = "value" + # using `.insert(key, value)` is recommended + cache.insert("key", "value") - # behaves like a regular dict - cache["key"] = "value" - # using `.insert(key, value)` is recommended - cache.insert("key", "value") + print(cache["key"]) # value - print(cache["key"]) # value + del cache["key"] + cache["key"] # KeyError: key - del cache["key"] - cache["key"] # KeyError: key - - # cachebox.Cache does not have any policy, so will raise OverflowError if the capacity is exceeded - cache.update({i:i for i in range(200)}) - # OverflowError: The cache has reached the bound. + # cachebox.Cache does not have any policy, so will raise OverflowError if the capacity is exceeded + cache.update({i:i for i in range(200)}) + # OverflowError: The cache has reached the bound. + ``` """ # | Class | get | insert | delete | popitem | @@ -457,23 +457,23 @@ class FIFOCache(BaseCacheImpl[KT, VT]): Avoid it when the workload has strong temporal locality; in those cases LRU or LFU will deliver meaningfully better hit rates. - Example:: - - from cachebox import FIFOCache + ```python + from cachebox import FIFOCache - cache = FIFOCache(5, {i:i*2 for i in range(5)}) + cache = FIFOCache(5, {i:i*2 for i in range(5)}) - print(len(cache)) # 5 - cache["new-key"] = "new-value" - print(len(cache)) # 5 + print(len(cache)) # 5 + cache["new-key"] = "new-value" + print(len(cache)) # 5 - print(cache.get(3, "default-val")) # 6 - print(cache.get(6, "default-val")) # default-val + print(cache.get(3, "default-val")) # 6 + print(cache.get(6, "default-val")) # default-val - print(cache.popitem()) # (1, 2) + print(cache.popitem()) # (1, 2) - # Returns the first key in cache; this is the one which will be removed by `popitem()`. - print(cache.first()) + # Returns the first key in cache; this is the one which will be removed by `popitem()`. + print(cache.first()) + ``` """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: @@ -636,16 +636,16 @@ class RRCache(BaseCacheImpl[KT, VT]): Avoid it when access patterns are highly skewed, cache hits are mission-critical, or fine-grained eviction control is required. - Example:: - - from cachebox import RRCache + ```python + from cachebox import RRCache - cache = RRCache(10, {i:i for i in range(10)}) - print(cache.is_full()) # True - print(cache.is_empty()) # False + cache = RRCache(10, {i:i for i in range(10)}) + print(cache.is_full()) # True + print(cache.is_empty()) # False - # Returns a random key - print(cache.random_key()) # 4 + # Returns a random key + print(cache.random_key()) # 4 + ``` """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: @@ -814,20 +814,20 @@ class LRUCache(BaseCacheImpl[KT, VT]): requirements, or frequency-heavy bimodal access patterns (consider LFU instead). - Example:: + ```python + from cachebox import LRUCache - from cachebox import LRUCache + cache = LRUCache(0, {i:i*2 for i in range(10)}) - cache = LRUCache(0, {i:i*2 for i in range(10)}) + # access `1` + print(cache[0]) # 0 + print(cache.least_recently_used()) # 1 + print(cache.popitem()) # (1, 2) - # access `1` - print(cache[0]) # 0 - print(cache.least_recently_used()) # 1 - print(cache.popitem()) # (1, 2) - - # .peek() searches for a key-value in the cache and returns it without moving the key to recently used. - print(cache.peek(2)) # 4 - print(cache.popitem()) # (3, 6) + # .peek() searches for a key-value in the cache and returns it without moving the key to recently used. + print(cache.peek(2)) # 4 + print(cache.popitem()) # (3, 6) + ``` """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: @@ -1029,28 +1029,28 @@ class LFUCache(BaseCacheImpl[KT, VT]): Avoid it when access patterns shift rapidly (use LRU instead) or when all keys are accessed with roughly equal probability. - Example:: - - from cachebox import LFUCache + ```python + from cachebox import LFUCache - cache = cachebox.LFUCache(5) - cache.insert('first', 'A') - cache.insert('second', 'B') + cache = cachebox.LFUCache(5) + cache.insert('first', 'A') + cache.insert('second', 'B') - # access 'first' twice - cache['first'] - cache['first'] + # access 'first' twice + cache['first'] + cache['first'] - # access 'second' once - cache['second'] + # access 'second' once + cache['second'] - assert cache.least_frequently_used() == 'second' - assert cache.least_frequently_used(2) is None # 2 is out of range + assert cache.least_frequently_used() == 'second' + assert cache.least_frequently_used(2) is None # 2 is out of range - for item in cache.items_with_frequency(): - print(item) - # ('second', 'B', 1) - # ('first', 'A', 2) + for item in cache.items_with_frequency(): + print(item) + # ('second', 'B', 1) + # ('first', 'A', 2) + ``` """ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: diff --git a/docs/docs/getting-started.md b/docs/docs/getting-started.md index 527b003..c236054 100644 --- a/docs/docs/getting-started.md +++ b/docs/docs/getting-started.md @@ -16,7 +16,7 @@ def factorial(number: int) -> int: fact *= num return fact -assert factorial(5) == 125 +assert factorial(5) == 120 ``` The first parameter `cache`, you can specify the cache instance it should use for caching. @@ -33,7 +33,7 @@ def factorial(number: int) -> int: fact *= num return fact -assert factorial(5) == 125 +assert factorial(5) == 120 ``` ### Async Functions @@ -115,7 +115,7 @@ add(1, 2) # HIT key=(1, 2) !!! tip - May be a coroutine in async contexts. + `callback`s can be a coroutine in async contexts. ### Setting a Postprocessor @@ -149,14 +149,30 @@ Ready to use postprocessors: - [postprocess_deepcopy function](api/utils.md#cachebox.utils.postprocess_deepcopy) - [postprocess_deepcopy_mutables function](api/utils.md#cachebox.utils.postprocess_deepcopy_mutables) -### Bypass the Cache for a Single Call -Pass `cachebox__ignore=True` to skip the cache entirely: +### Bypass the Cache for a Call +Sometimes you need to execute the wrapped function without reading from or writing to the cache. +Pass `cachebox__ignore=True` when calling the function: ```python -result = my_func(10, 20, cachebox__ignore=True) +import cachebox + +@cachebox.cached(cachebox.LRUCache(128)) +def add(a, b): + print("computing...") + return a + b + +add(1, 2) # computing... +add(1, 2) # returned from cache + +add(1, 2, cachebox__ignore=True) +# computing... ``` -### Cache on an Instance Method +This affects only the current call. Future calls continue to use the cache normally. + +### Caching Methods + +For instance methods, each object often needs its own cache. The cache can be stored on the instance and provided dynamically using a callable. ```python hl_lines="6 8" import cachebox @@ -164,16 +180,94 @@ import cachebox class MyService: def __init__(self, multiplier: int): self.multiplier = multiplier - self._cache = cachebox.TTLCache(20, ttl=10) + self._cache = cachebox.TTLCache(20, 10) @cachebox.cached(lambda self: self._cache) def compute(self, char: str): return char * self.multiplier svc = MyService(5) + assert svc.compute("a") == "aaaaa" +assert svc.compute("a") == "aaaaa" # cached +``` + +Using a cache stored on the instance ensures that each object maintains its own cached values: + +```python +svc1 = MyService(2) +svc2 = MyService(5) + +assert svc1.compute("x") == "xx" +assert svc2.compute("x") == "xxxxx" +``` + +Because each instance has a separate cache, entries created by `svc1` are not visible to `svc2`. + +### Caching `@staticmethod`s +`@staticmethod`s behave like normal functions attached to a class. Since they do not receive `self` or `cls`, you can provide a cache instance directly. + +```python +import cachebox + +class TextUtils: + @staticmethod + @cachebox.cached(cachebox.LRUCache(128)) + def normalize(text: str) -> str: + print("normalizing...") + return text.strip().lower() + +TextUtils.normalize(" Hello ") +TextUtils.normalize(" Hello ") # cached ``` +The cache is shared by all callers because the method does not belong to a specific instance. + +### Caching `@classmethod`s +`@classmethod`s receive the class (`cls`) as their first argument. +The cache can be shared across the class or selected dynamically based on the class. + +```python +import cachebox + +class UserRepository: + _cache = cachebox.LRUCache(128) + + @classmethod + @cachebox.cached(lambda cls: cls._cache) + def get_user(cls, user_id: int): + print("loading user...") + return {"id": user_id} + +UserRepository.get_user(1) +UserRepository.get_user(1) # cached +``` + +This pattern is useful when the cache should be associated with the class itself rather than with +individual instances. +Class methods can also be used with inheritance. Each subclass may provide its own cache: + +```python +import cachebox + +class BaseRepository: + _cache = cachebox.LRUCache(128) + + @classmethod + @cachebox.cached(lambda cls: cls._cache) + def get_item(cls, item_id): + return f"{cls.__name__}:{item_id}" + +class ProductRepository(BaseRepository): + _cache = cachebox.LRUCache(128) + +class OrderRepository(BaseRepository): + _cache = cachebox.LRUCache(128) +``` + +In this example, each repository class maintains an independent cache while reusing +the same cached method implementation. + ## Using a Cache Implemetations You can use all cache implementations without `@cached` method. You only need to import the classes you want and can work with them like a regular dictionaries @@ -188,6 +282,8 @@ assert cache["key"] == "value" assert cache.get("missing", "default") == "default" ``` +You can see examples of each cache implementation in [API Reference](api/impls.md). Also these examples are exist in their docstrings. + ## Immutable (Frozen) Cache Wrap any cache with `Frozen` to prevent further writes: diff --git a/docs/docs/tips.md b/docs/docs/tips.md index 2cdc252..44dfcd3 100644 --- a/docs/docs/tips.md +++ b/docs/docs/tips.md @@ -22,6 +22,10 @@ assert cache == loaded assert cache.capacity() == loaded.capacity() ``` +!!! note + + Don't set `lambda` as `getsizeof` for caches when you want to pickle them. + ## Copying a Cache All cache classes support Python's `copy` module, both shallow-copy and deep-copy: @@ -35,12 +39,6 @@ shallow = copy.copy(cache) # shallow copy deep = copy.deepcopy(cache) # deep copy ``` -## Avoiding Cache Stampede -Cachebox uses a distributed lock system internally to prevent the -[cache stampede](https://en.wikipedia.org/wiki/Cache_stampede) problem — -multiple concurrent requests recomputing the same missing entry simultaneously. -No additional configuration is required. - ## Pre-allocating Capacity If you know roughly how many items a cache will hold, set `capacity` to avoid hash table rehashing during initial population: diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index bd0798e..2e4f795 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -87,10 +87,9 @@ nav: - Home: index.md - Installation: installation.md - Getting Started: getting-started.md + - Tips & Notes: tips.md + - Migration Guide: migration.md - API Reference: - - Overview: api/index.md - - Implementations: api/impls.md + - API Reference: api/index.md + - Classes: api/impls.md - Utilities: api/utils.md - - - Migration Guide: migration.md - - Tips & Notes: tips.md