From 1272515aa1e3e71ccedcbeeefebcbc3d8e4c32bf Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 11:59:32 +0530 Subject: [PATCH 01/22] #1 chore: set up poetry --- poetry.lock | 554 +++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 26 +++ 2 files changed, 580 insertions(+) create mode 100644 poetry.lock create mode 100644 pyproject.toml diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..84c43ed --- /dev/null +++ b/poetry.lock @@ -0,0 +1,554 @@ +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. + +[[package]] +name = "astroid" +version = "4.0.4" +description = "An abstract syntax tree for Python with inference support." +optional = false +python-versions = ">=3.10.0" +groups = ["dev"] +files = [ + {file = "astroid-4.0.4-py3-none-any.whl", hash = "sha256:52f39653876c7dec3e3afd4c2696920e05c83832b9737afc21928f2d2eb7a753"}, + {file = "astroid-4.0.4.tar.gz", hash = "sha256:986fed8bcf79fb82c78b18a53352a0b287a73817d6dbcfba3162da36667c49a0"}, +] + +[[package]] +name = "black" +version = "26.3.1" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "black-26.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:86a8b5035fce64f5dcd1b794cf8ec4d31fe458cf6ce3986a30deb434df82a1d2"}, + {file = "black-26.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5602bdb96d52d2d0672f24f6ffe5218795736dd34807fd0fd55ccd6bf206168b"}, + {file = "black-26.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c54a4a82e291a1fee5137371ab488866b7c86a3305af4026bdd4dc78642e1ac"}, + {file = "black-26.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:6e131579c243c98f35bce64a7e08e87fb2d610544754675d4a0e73a070a5aa3a"}, + {file = "black-26.3.1-cp310-cp310-win_arm64.whl", hash = "sha256:5ed0ca58586c8d9a487352a96b15272b7fa55d139fc8496b519e78023a8dab0a"}, + {file = "black-26.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:28ef38aee69e4b12fda8dba75e21f9b4f979b490c8ac0baa7cb505369ac9e1ff"}, + {file = "black-26.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bf9bf162ed91a26f1adba8efda0b573bc6924ec1408a52cc6f82cb73ec2b142c"}, + {file = "black-26.3.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:474c27574d6d7037c1bc875a81d9be0a9a4f9ee95e62800dab3cfaadbf75acd5"}, + {file = "black-26.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:5e9d0d86df21f2e1677cc4bd090cd0e446278bcbbe49bf3659c308c3e402843e"}, + {file = "black-26.3.1-cp311-cp311-win_arm64.whl", hash = "sha256:9a5e9f45e5d5e1c5b5c29b3bd4265dcc90e8b92cf4534520896ed77f791f4da5"}, + {file = "black-26.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e6f89631eb88a7302d416594a32faeee9fb8fb848290da9d0a5f2903519fc1"}, + {file = "black-26.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:41cd2012d35b47d589cb8a16faf8a32ef7a336f56356babd9fcf70939ad1897f"}, + {file = "black-26.3.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f76ff19ec5297dd8e66eb64deda23631e642c9393ab592826fd4bdc97a4bce7"}, + {file = "black-26.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ddb113db38838eb9f043623ba274cfaf7d51d5b0c22ecb30afe58b1bb8322983"}, + {file = "black-26.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:dfdd51fc3e64ea4f35873d1b3fb25326773d55d2329ff8449139ebaad7357efb"}, + {file = "black-26.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:855822d90f884905362f602880ed8b5df1b7e3ee7d0db2502d4388a954cc8c54"}, + {file = "black-26.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8a33d657f3276328ce00e4d37fe70361e1ec7614da5d7b6e78de5426cb56332f"}, + {file = "black-26.3.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f1cd08e99d2f9317292a311dfe578fd2a24b15dbce97792f9c4d752275c1fa56"}, + {file = "black-26.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:c7e72339f841b5a237ff14f7d3880ddd0fc7f98a1199e8c4327f9a4f478c1839"}, + {file = "black-26.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:afc622538b430aa4c8c853f7f63bc582b3b8030fd8c80b70fb5fa5b834e575c2"}, + {file = "black-26.3.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2d6bfaf7fd0993b420bed691f20f9492d53ce9a2bcccea4b797d34e947318a78"}, + {file = "black-26.3.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f89f2ab047c76a9c03f78d0d66ca519e389519902fa27e7a91117ef7611c0568"}, + {file = "black-26.3.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b07fc0dab849d24a80a29cfab8d8a19187d1c4685d8a5e6385a5ce323c1f015f"}, + {file = "black-26.3.1-cp314-cp314-win_amd64.whl", hash = "sha256:0126ae5b7c09957da2bdbd91a9ba1207453feada9e9fe51992848658c6c8e01c"}, + {file = "black-26.3.1-cp314-cp314-win_arm64.whl", hash = "sha256:92c0ec1f2cc149551a2b7b47efc32c866406b6891b0ee4625e95967c8f4acfb1"}, + {file = "black-26.3.1-py3-none-any.whl", hash = "sha256:2bd5aa94fc267d38bb21a70d7410a89f1a1d318841855f698746f8e7f51acd1b"}, + {file = "black-26.3.1.tar.gz", hash = "sha256:2c50f5063a9641c7eed7795014ba37b0f5fa227f3d408b968936e24bc0566b07"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=1.0.0" +platformdirs = ">=2" +pytokens = ">=0.4.0,<0.5.0" + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.10)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2) ; sys_platform != \"win32\"", "winloop (>=0.5.0) ; sys_platform == \"win32\""] + +[[package]] +name = "cfgv" +version = "3.5.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0"}, + {file = "cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132"}, +] + +[[package]] +name = "click" +version = "8.3.2" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "click-8.3.2-py3-none-any.whl", hash = "sha256:1924d2c27c5653561cd2cae4548d1406039cb79b858b747cfea24924bbc1616d"}, + {file = "click-8.3.2.tar.gz", hash = "sha256:14162b8b3b3550a7d479eafa77dfd3c38d9dc8951f6f69c78913a8f9a7540fd5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["dev"] +markers = "sys_platform == \"win32\" or platform_system == \"Windows\"" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "dill" +version = "0.4.1" +description = "serialize all of Python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "dill-0.4.1-py3-none-any.whl", hash = "sha256:1e1ce33e978ae97fcfcff5638477032b801c46c7c65cf717f95fbc2248f79a9d"}, + {file = "dill-0.4.1.tar.gz", hash = "sha256:423092df4182177d4d8ba8290c8a5b640c66ab35ec7da59ccfa00f6fa3eea5fa"}, +] + +[package.extras] +graph = ["objgraph (>=1.7.2)"] +profile = ["gprof2dot (>=2022.7.29)"] + +[[package]] +name = "distlib" +version = "0.4.0" +description = "Distribution utilities" +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16"}, + {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"}, +] + +[[package]] +name = "filelock" +version = "3.25.2" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "filelock-3.25.2-py3-none-any.whl", hash = "sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70"}, + {file = "filelock-3.25.2.tar.gz", hash = "sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694"}, +] + +[[package]] +name = "identify" +version = "2.6.18" +description = "File identification library for Python" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "identify-2.6.18-py2.py3-none-any.whl", hash = "sha256:8db9d3c8ea9079db92cafb0ebf97abdc09d52e97f4dcf773a2e694048b7cd737"}, + {file = "identify-2.6.18.tar.gz", hash = "sha256:873ac56a5e3fd63e7438a7ecbc4d91aca692eb3fefa4534db2b7913f3fc352fd"}, +] + +[package.extras] +license = ["ukkonen"] + +[[package]] +name = "iniconfig" +version = "2.3.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"}, + {file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"}, +] + +[[package]] +name = "isort" +version = "8.0.1" +description = "A Python utility / library to sort Python imports." +optional = false +python-versions = ">=3.10.0" +groups = ["dev"] +files = [ + {file = "isort-8.0.1-py3-none-any.whl", hash = "sha256:28b89bc70f751b559aeca209e6120393d43fbe2490de0559662be7a9787e3d75"}, + {file = "isort-8.0.1.tar.gz", hash = "sha256:171ac4ff559cdc060bcfff550bc8404a486fee0caab245679c2abe7cb253c78d"}, +] + +[package.extras] +colors = ["colorama"] + +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, + {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, +] + +[[package]] +name = "nodeenv" +version = "1.10.0" +description = "Node.js virtual environment builder" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["dev"] +files = [ + {file = "nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827"}, + {file = "nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb"}, +] + +[[package]] +name = "packaging" +version = "26.0" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529"}, + {file = "packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4"}, +] + +[[package]] +name = "pathspec" +version = "1.0.4" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723"}, + {file = "pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645"}, +] + +[package.extras] +hyperscan = ["hyperscan (>=0.7)"] +optional = ["typing-extensions (>=4)"] +re2 = ["google-re2 (>=1.1)"] +tests = ["pytest (>=9)", "typing-extensions (>=4.15)"] + +[[package]] +name = "platformdirs" +version = "4.9.4" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "platformdirs-4.9.4-py3-none-any.whl", hash = "sha256:68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868"}, + {file = "platformdirs-4.9.4.tar.gz", hash = "sha256:1ec356301b7dc906d83f371c8f487070e99d3ccf9e501686456394622a01a934"}, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, + {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["coverage", "pytest", "pytest-benchmark"] + +[[package]] +name = "pre-commit" +version = "4.5.1" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77"}, + {file = "pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + +[[package]] +name = "pygments" +version = "2.20.0" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176"}, + {file = "pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + +[[package]] +name = "pylint" +version = "4.0.5" +description = "python code static checker" +optional = false +python-versions = ">=3.10.0" +groups = ["dev"] +files = [ + {file = "pylint-4.0.5-py3-none-any.whl", hash = "sha256:00f51c9b14a3b3ae08cff6b2cdd43f28165c78b165b628692e428fb1f8dc2cf2"}, + {file = "pylint-4.0.5.tar.gz", hash = "sha256:8cd6a618df75deb013bd7eb98327a95f02a6fb839205a6bbf5456ef96afb317c"}, +] + +[package.dependencies] +astroid = ">=4.0.2,<=4.1.dev0" +colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} +dill = {version = ">=0.3.7", markers = "python_version >= \"3.12\""} +isort = ">=5,<5.13 || >5.13,<9" +mccabe = ">=0.6,<0.8" +platformdirs = ">=2.2" +tomlkit = ">=0.10.1" + +[package.extras] +spelling = ["pyenchant (>=3.2,<4.0)"] +testutils = ["gitpython (>3)"] + +[[package]] +name = "pytest" +version = "9.0.2" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b"}, + {file = "pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11"}, +] + +[package.dependencies] +colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} +iniconfig = ">=1.0.1" +packaging = ">=22" +pluggy = ">=1.5,<2" +pygments = ">=2.7.2" + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "python-discovery" +version = "1.2.1" +description = "Python interpreter discovery" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "python_discovery-1.2.1-py3-none-any.whl", hash = "sha256:b6a957b24c1cd79252484d3566d1b49527581d46e789aaf43181005e56201502"}, + {file = "python_discovery-1.2.1.tar.gz", hash = "sha256:180c4d114bff1c32462537eac5d6a332b768242b76b69c0259c7d14b1b680c9e"}, +] + +[package.dependencies] +filelock = ">=3.15.4" +platformdirs = ">=4.3.6,<5" + +[package.extras] +docs = ["furo (>=2025.12.19)", "sphinx (>=9.1)", "sphinx-autodoc-typehints (>=3.6.3)", "sphinxcontrib-mermaid (>=2)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.5.4)", "pytest (>=8.3.5)", "pytest-mock (>=3.14)", "setuptools (>=75.1)"] + +[[package]] +name = "pytokens" +version = "0.4.1" +description = "A Fast, spec compliant Python 3.14+ tokenizer that runs on older Pythons." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pytokens-0.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a44ed93ea23415c54f3face3b65ef2b844d96aeb3455b8a69b3df6beab6acc5"}, + {file = "pytokens-0.4.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:add8bf86b71a5d9fb5b89f023a80b791e04fba57960aa790cc6125f7f1d39dfe"}, + {file = "pytokens-0.4.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:670d286910b531c7b7e3c0b453fd8156f250adb140146d234a82219459b9640c"}, + {file = "pytokens-0.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4e691d7f5186bd2842c14813f79f8884bb03f5995f0575272009982c5ac6c0f7"}, + {file = "pytokens-0.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:27b83ad28825978742beef057bfe406ad6ed524b2d28c252c5de7b4a6dd48fa2"}, + {file = "pytokens-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d70e77c55ae8380c91c0c18dea05951482e263982911fc7410b1ffd1dadd3440"}, + {file = "pytokens-0.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a58d057208cb9075c144950d789511220b07636dd2e4708d5645d24de666bdc"}, + {file = "pytokens-0.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b49750419d300e2b5a3813cf229d4e5a4c728dae470bcc89867a9ad6f25a722d"}, + {file = "pytokens-0.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d9907d61f15bf7261d7e775bd5d7ee4d2930e04424bab1972591918497623a16"}, + {file = "pytokens-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:ee44d0f85b803321710f9239f335aafe16553b39106384cef8e6de40cb4ef2f6"}, + {file = "pytokens-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:140709331e846b728475786df8aeb27d24f48cbcf7bcd449f8de75cae7a45083"}, + {file = "pytokens-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d6c4268598f762bc8e91f5dbf2ab2f61f7b95bdc07953b602db879b3c8c18e1"}, + {file = "pytokens-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24afde1f53d95348b5a0eb19488661147285ca4dd7ed752bbc3e1c6242a304d1"}, + {file = "pytokens-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5ad948d085ed6c16413eb5fec6b3e02fa00dc29a2534f088d3302c47eb59adf9"}, + {file = "pytokens-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:3f901fe783e06e48e8cbdc82d631fca8f118333798193e026a50ce1b3757ea68"}, + {file = "pytokens-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8bdb9d0ce90cbf99c525e75a2fa415144fd570a1ba987380190e8b786bc6ef9b"}, + {file = "pytokens-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5502408cab1cb18e128570f8d598981c68a50d0cbd7c61312a90507cd3a1276f"}, + {file = "pytokens-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29d1d8fb1030af4d231789959f21821ab6325e463f0503a61d204343c9b355d1"}, + {file = "pytokens-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:970b08dd6b86058b6dc07efe9e98414f5102974716232d10f32ff39701e841c4"}, + {file = "pytokens-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:9bd7d7f544d362576be74f9d5901a22f317efc20046efe2034dced238cbbfe78"}, + {file = "pytokens-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4a14d5f5fc78ce85e426aa159489e2d5961acf0e47575e08f35584009178e321"}, + {file = "pytokens-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f50fd18543be72da51dd505e2ed20d2228c74e0464e4262e4899797803d7fa"}, + {file = "pytokens-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc74c035f9bfca0255c1af77ddd2d6ae8419012805453e4b0e7513e17904545d"}, + {file = "pytokens-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f66a6bbe741bd431f6d741e617e0f39ec7257ca1f89089593479347cc4d13324"}, + {file = "pytokens-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:b35d7e5ad269804f6697727702da3c517bb8a5228afa450ab0fa787732055fc9"}, + {file = "pytokens-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8fcb9ba3709ff77e77f1c7022ff11d13553f3c30299a9fe246a166903e9091eb"}, + {file = "pytokens-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79fc6b8699564e1f9b521582c35435f1bd32dd06822322ec44afdeba666d8cb3"}, + {file = "pytokens-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d31b97b3de0f61571a124a00ffe9a81fb9939146c122c11060725bd5aea79975"}, + {file = "pytokens-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:967cf6e3fd4adf7de8fc73cd3043754ae79c36475c1c11d514fc72cf5490094a"}, + {file = "pytokens-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:584c80c24b078eec1e227079d56dc22ff755e0ba8654d8383b2c549107528918"}, + {file = "pytokens-0.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:da5baeaf7116dced9c6bb76dc31ba04a2dc3695f3d9f74741d7910122b456edc"}, + {file = "pytokens-0.4.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11edda0942da80ff58c4408407616a310adecae1ddd22eef8c692fe266fa5009"}, + {file = "pytokens-0.4.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0fc71786e629cef478cbf29d7ea1923299181d0699dbe7c3c0f4a583811d9fc1"}, + {file = "pytokens-0.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dcafc12c30dbaf1e2af0490978352e0c4041a7cde31f4f81435c2a5e8b9cabb6"}, + {file = "pytokens-0.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:42f144f3aafa5d92bad964d471a581651e28b24434d184871bd02e3a0d956037"}, + {file = "pytokens-0.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:34bcc734bd2f2d5fe3b34e7b3c0116bfb2397f2d9666139988e7a3eb5f7400e3"}, + {file = "pytokens-0.4.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941d4343bf27b605e9213b26bfa1c4bf197c9c599a9627eb7305b0defcfe40c1"}, + {file = "pytokens-0.4.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3ad72b851e781478366288743198101e5eb34a414f1d5627cdd585ca3b25f1db"}, + {file = "pytokens-0.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:682fa37ff4d8e95f7df6fe6fe6a431e8ed8e788023c6bcc0f0880a12eab80ad1"}, + {file = "pytokens-0.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:30f51edd9bb7f85c748979384165601d028b84f7bd13fe14d3e065304093916a"}, + {file = "pytokens-0.4.1-py3-none-any.whl", hash = "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de"}, + {file = "pytokens-0.4.1.tar.gz", hash = "sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a"}, +] + +[package.extras] +dev = ["black", "build", "mypy", "pytest", "pytest-cov", "setuptools", "tox", "twine", "wheel"] + +[[package]] +name = "pyyaml" +version = "6.0.3" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6"}, + {file = "PyYAML-6.0.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369"}, + {file = "PyYAML-6.0.3-cp38-cp38-win32.whl", hash = "sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295"}, + {file = "PyYAML-6.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b"}, + {file = "pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b"}, + {file = "pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b"}, + {file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0"}, + {file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69"}, + {file = "pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e"}, + {file = "pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c"}, + {file = "pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e"}, + {file = "pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d"}, + {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a"}, + {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4"}, + {file = "pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b"}, + {file = "pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf"}, + {file = "pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196"}, + {file = "pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea"}, + {file = "pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be"}, + {file = "pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b"}, + {file = "pyyaml-6.0.3-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da"}, + {file = "pyyaml-6.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a"}, + {file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926"}, + {file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7"}, + {file = "pyyaml-6.0.3-cp39-cp39-win32.whl", hash = "sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0"}, + {file = "pyyaml-6.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007"}, + {file = "pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f"}, +] + +[[package]] +name = "tomlkit" +version = "0.14.0" +description = "Style preserving TOML library" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680"}, + {file = "tomlkit-0.14.0.tar.gz", hash = "sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064"}, +] + +[[package]] +name = "virtualenv" +version = "21.2.0" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "virtualenv-21.2.0-py3-none-any.whl", hash = "sha256:1bd755b504931164a5a496d217c014d098426cddc79363ad66ac78125f9d908f"}, + {file = "virtualenv-21.2.0.tar.gz", hash = "sha256:1720dc3a62ef5b443092e3f499228599045d7fea4c79199770499df8becf9098"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = {version = ">=3.24.2,<4", markers = "python_version >= \"3.10\""} +platformdirs = ">=3.9.1,<5" +python-discovery = ">=1" + +[metadata] +lock-version = "2.1" +python-versions = ">=3.12" +content-hash = "75265641fd1a3f2a4d608312a3879427b7141ac2a51d0873da5711cbc8ead28e" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8d8b711 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,26 @@ +[project] +name = "theseus" +version = "0.1.0" +description = "A philosophical data pipeline analyzing the decay and rebirth of codebases." +authors = [ + {name = "Asif Sayyed",email = "asifdotexe@gmail.com"} +] +license = {text = "AGPL-3.0 license"} +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ +] + +[dependency-groups] +dev = [ + "pytest (==9.0.2)", + "pre-commit (>=4.5.1,<5.0.0)", + "isort (>=8.0.1,<9.0.0)", + "black (>=26.3.1,<27.0.0)", + "pylint (>=4.0.5,<5.0.0)" +] + + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" From 6cd6c64f4fc6b961ceb2f0d5c6c5e852ab1b8517 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 12:10:43 +0530 Subject: [PATCH 02/22] #1 chore: set up precommit yaml file --- .pre-commit-config.yaml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..44210bb --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,29 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + + - repo: https://github.com/psf/black + rev: 25.9.0 + hooks: + - id: black + + - repo: https://github.com/PyCQA/isort + rev: 6.1.0 + hooks: + - id: isort + name: isort (python) + + - repo: https://github.com/pylint-dev/pylint + rev: v3.3.8 + hooks: + - id: pylint + name: pylint + entry: poetry run pylint + language: system + types: [python] + args: ["--rcfile=pyproject.toml"] From 63e9422aa588bf2a407a6f470e4c253f38a950cf Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 12:19:35 +0530 Subject: [PATCH 03/22] #1 feat: add internal function for running cmd --- scripts/analyse_repository.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 scripts/analyse_repository.py diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py new file mode 100644 index 0000000..549bfdc --- /dev/null +++ b/scripts/analyse_repository.py @@ -0,0 +1,30 @@ +""" +This script is reposible for doing the heavy lifting. +Collects the 12 blames per year for target repository +""" + +import subprocess + + +def _run_command(cmd: list[str], cwd: str) -> str: + """ + Execute a shell command and return it's standard output + + :param cmd: List of arguments forming the command. + :param cwd: Directory path where the command should be executed. + :return: Decoded standard output of the command. + """ + try: + result = subprocess.run( + cmd, + cwd=cwd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True, + ) + return result.stdout.strip() + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"Command '{' '.join(cmd)}' failed with exit code {e.returncode}" + ) from e From f6af97836abcfe02562047111f1ce6c65831f195 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 12:38:03 +0530 Subject: [PATCH 04/22] #1 chore: update max line limit --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 8d8b711..af76f56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,8 @@ dev = [ "pylint (>=4.0.5,<5.0.0)" ] +[tool.pylint.format] +max-line-length = 120 [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] From b06d9094934c27c8024a43f25b516a3dcabb8978 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 12:38:26 +0530 Subject: [PATCH 05/22] #1 feat: add function to capture monthly snapshot --- scripts/analyse_repository.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 549bfdc..6c1f558 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -28,3 +28,32 @@ def _run_command(cmd: list[str], cwd: str) -> str: raise RuntimeError( f"Command '{' '.join(cmd)}' failed with exit code {e.returncode}" ) from e + + +def get_monthly_snapshots(repo_path: str) -> list[tuple[str, str]]: + """ + Identify one commit per month to act as a historical snapshot. + + :param repo_path: Path to the git repository. + :return: A list of tuples, each containing a 'YYYY-MM' period and the corresponding commit hash. + i.e., [(period, commit_hash), ...] + """ + log_output = _run_command( + cmd=["git", "log", "--pretty=format:%H|%cI"], cwd=repo_path + ) + + snapshot: dict = {} + for line in log_output.splitlines(): + if not line: + continue + commit_hash, commit_date = line.split("|") + + # We slice the first 7 characters of the ISO to get the 'YYYY-MM' period + period = commit_date[:7] + + # Git log outputs newest commit first. By assigning to the dictionary, + # the last commit processed for a month overwrites earlier ones, + # leaving us with the very first commit of that specific month + snapshot[period] = commit_hash + + return sorted(snapshot.items(), key=lambda x: x[0]) From 7111ba547efd1c691972223526d3d736d1fec935 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 13:00:27 +0530 Subject: [PATCH 06/22] #1 feat: add func to analyse snapshot --- scripts/analyse_repository.py | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 6c1f558..605d354 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -3,7 +3,10 @@ Collects the 12 blames per year for target repository """ +import os import subprocess +from collections import defaultdict +from datetime import datetime def _run_command(cmd: list[str], cwd: str) -> str: @@ -57,3 +60,42 @@ def get_monthly_snapshots(repo_path: str) -> list[tuple[str, str]]: snapshot[period] = commit_hash return sorted(snapshot.items(), key=lambda x: x[0]) + + +# TODO: Optimisation opportunity: use git blame directly on the file list or if the file is larger than 32KB, +# batch it into chunks as that the N would be reduced +def analyze_snapshots(repo_path: str, commit: str) -> dict[str, int]: + """ + Analyze the snapshots collected from the repository. + """ + # We don't store the return value here because we only care about the side effect. + # This command physically alters the file in the directory to match the commit's history + # If the command fails, _run_command will raise an exception and halt execution automatically + _run_command(cmd=["git", "checkout", commit], cwd=repo_path) + + files_output = _run_command(cmd=["git", "ls-files"], cwd=repo_path) + files = files_output.splitlines() + + age_distribution = defaultdict(int) + + for file in files: + file_path = os.path.join(repo_path, file) + if os.path.exists(file_path): + continue + + try: + # '--line-porcelain' provides machine-readable output of blame, including author-time + blame_output = _run_command( + cmd=["git", "blame", "--line-porcelain", file], cwd=repo_path + ) + for line in blame_output.splitlines(): + if line.startswith("author-time "): + timestamp = int(line.split(" ")[1]) + birth_year = datetime.fromtimestamp(timestamp).strftime("%Y") + age_distribution[birth_year] += 1 + break + except RuntimeError: + # Skip files that git blame cannot process (e.g., binary files) + continue + + return dict(age_distribution) From b124a3954c670d930c2c91c57aa966a827587200 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 13:10:16 +0530 Subject: [PATCH 07/22] #1 chore: ignore fixme lint warning --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index af76f56..3690a90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,9 @@ dev = [ [tool.pylint.format] max-line-length = 120 +[tool.pylint.messages_control] +disable = ["fixme"] + [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] build-backend = "poetry.core.masonry.api" From 72264090aa20cbf3c63eef3d7402231e15d3938c Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 13:10:39 +0530 Subject: [PATCH 08/22] #1 feat: add func to prevent redundant blame calc --- scripts/analyse_repository.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 605d354..22f9a00 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -3,6 +3,7 @@ Collects the 12 blames per year for target repository """ +import json import os import subprocess from collections import defaultdict @@ -62,7 +63,7 @@ def get_monthly_snapshots(repo_path: str) -> list[tuple[str, str]]: return sorted(snapshot.items(), key=lambda x: x[0]) -# TODO: Optimisation opportunity: use git blame directly on the file list or if the file is larger than 32KB, +# FIXME: Optimisation opportunity: use git blame directly on the file list or if the file is larger than 32KB, # batch it into chunks as that the N would be reduced def analyze_snapshots(repo_path: str, commit: str) -> dict[str, int]: """ @@ -99,3 +100,20 @@ def analyze_snapshots(repo_path: str, commit: str) -> dict[str, int]: continue return dict(age_distribution) + + +def load_existing_state(json_fname: str) -> list[dict]: + """ + Load the existing historical data to prevent redundant re-calculations. + + :param json_fname: Path to the existing JSON file containing the historical data. + :return: A list of dictionaries with the historical data. + """ + if os.path.exists(json_fname): + try: + with open(json_fname, "r", encoding="utf-8") as f: + return json.load(f) + except json.JSONDecodeError: + print(f"Warning: {json_fname} is corrupted, Start fresh.") + return [] + return [] From 003b27ac23ccb2a8278d83e1475fea9246067a6f Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 14:08:29 +0530 Subject: [PATCH 09/22] #1 feat: implement a main func --- data/theseus_data.json | 102 ++++++++++++++++++++++++++++++++++ scripts/analyse_repository.py | 74 +++++++++++++++++++++++- 2 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 data/theseus_data.json diff --git a/data/theseus_data.json b/data/theseus_data.json new file mode 100644 index 0000000..a317d9c --- /dev/null +++ b/data/theseus_data.json @@ -0,0 +1,102 @@ +[ + { + "snapshot_date": "2023-08", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2023-09", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2023-11", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2023-12", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2024-07", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2024-08", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2024-10", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2024-12", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2025-01", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2025-02", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2025-05", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2025-07", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2025-08", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2025-09", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2025-10", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2025-11", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2026-01", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2026-02", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2026-03", + "total_lines": 0, + "composition": {} + }, + { + "snapshot_date": "2026-04", + "total_lines": 0, + "composition": {} + } +] \ No newline at end of file diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 22f9a00..45ddd93 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -9,6 +9,8 @@ from collections import defaultdict from datetime import datetime +# FIXME: The data returns 0 lines of code and no composition + def _run_command(cmd: list[str], cwd: str) -> str: """ @@ -64,7 +66,7 @@ def get_monthly_snapshots(repo_path: str) -> list[tuple[str, str]]: # FIXME: Optimisation opportunity: use git blame directly on the file list or if the file is larger than 32KB, -# batch it into chunks as that the N would be reduced +# batch it into chunks as that the N would be reduced def analyze_snapshots(repo_path: str, commit: str) -> dict[str, int]: """ Analyze the snapshots collected from the repository. @@ -117,3 +119,73 @@ def load_existing_state(json_fname: str) -> list[dict]: print(f"Warning: {json_fname} is corrupted, Start fresh.") return [] return [] + + +# TODO: Make the main function to tie everything together +def generate_theseus_data(target_repo_path: str, output_json_fname: str) -> None: + """ + Orchestrate the extract of Ship of Theseus code persistence data + using an incremental load strategy by just processing the delta + """ + # System design thinking is that we don't want to load existing state and recalculate redundantly + # + # Let's say we have a 10-year old repository. Running git blame on every file for every single month + # of it's 120 month long history would take hours and it would blow past the GitHub Action's free tier limit. + # By loading the existing state and only processing the delta, we can avoid this and run much faster. + # + # This reduces a 30-minute monthly compute job down to about 5 seconds, + # ensuring that I don't have to pay for keeping this project alive lmao. + historical_data = load_existing_state(output_json_fname) + processed_periods = set(item["snapshot_date"] for item in historical_data) + + all_snapshots = get_monthly_snapshots(target_repo_path) + new_snapshots = [] + + for period, commit in all_snapshots: + if period in processed_periods: + # We already know that the repository looked like this month. Skip it. + continue + + print(f"Calculating DELTA for new period: {period} (Commit: {commit[:7]})...") + distribution = analyze_snapshots(target_repo_path, commit) + + new_snapshots.append( + { + "snapshot_date": period, + "total_lines": sum(distribution.values()), + "composition": distribution, + } + ) + + if not new_snapshots: + print("No new months to process. Repository data is already up-to-date.") + return + + # Combine the historical data with the newly processed delta + final_dataset = historical_data + new_snapshots + + # Ensure chronological order to prevent rendering glitches on the frontend + final_dataset.sort(key=lambda x: x["snapshot_date"]) + + # Polite cleanup: return the repo to its intial state + _run_command(["git", "checkout", "-"], cwd=target_repo_path) + + with open(output_json_fname, "w", encoding="utf-8") as f: + json.dump(final_dataset, f, indent=4) + + print( + f"Delta analysis completed. Appended {len(new_snapshots)} new months to the dataset." + ) + + +# FIXME: Make this into argparse or list for scalability +if __name__ == "__main__": + TARGET_REPO_PATH = "C:\\Users\\sayye\\OneDrive\\Documents\\GitHub\\portfolio" + OUTPUT_JSON_FNAME = "./data/theseus_data.json" + + os.makedirs(os.path.dirname(OUTPUT_JSON_FNAME), exist_ok=True) + + if os.path.exists(TARGET_REPO_PATH): + generate_theseus_data(TARGET_REPO_PATH, OUTPUT_JSON_FNAME) + else: + print(f"Target repository not found: {TARGET_REPO_PATH}") From 8affcc4119cf2e8733ffeb98b683109209a2859b Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 14:51:59 +0530 Subject: [PATCH 10/22] #1 fix: git blame command and re-run it --- data/theseus_data.json | 143 ++++++++++++++++++++++++---------- scripts/analyse_repository.py | 72 +++++++++++------ 2 files changed, 148 insertions(+), 67 deletions(-) diff --git a/data/theseus_data.json b/data/theseus_data.json index a317d9c..98e8f53 100644 --- a/data/theseus_data.json +++ b/data/theseus_data.json @@ -1,102 +1,159 @@ [ { "snapshot_date": "2023-08", - "total_lines": 0, - "composition": {} + "total_lines": 2, + "composition": { + "2023": 2 + } }, { "snapshot_date": "2023-09", - "total_lines": 0, - "composition": {} + "total_lines": 120887, + "composition": { + "2023": 120887 + } }, { "snapshot_date": "2023-11", - "total_lines": 0, - "composition": {} + "total_lines": 123485, + "composition": { + "2023": 123485 + } }, { "snapshot_date": "2023-12", - "total_lines": 0, - "composition": {} + "total_lines": 130130, + "composition": { + "2023": 130130 + } }, { "snapshot_date": "2024-07", - "total_lines": 0, - "composition": {} + "total_lines": 8678, + "composition": { + "2024": 8677, + "2023": 1 + } }, { "snapshot_date": "2024-08", - "total_lines": 0, - "composition": {} + "total_lines": 56103, + "composition": { + "2024": 56102, + "2023": 1 + } }, { "snapshot_date": "2024-10", - "total_lines": 0, - "composition": {} + "total_lines": 56127, + "composition": { + "2024": 56126, + "2023": 1 + } }, { "snapshot_date": "2024-12", - "total_lines": 0, - "composition": {} + "total_lines": 63804, + "composition": { + "2024": 63803, + "2023": 1 + } }, { "snapshot_date": "2025-01", - "total_lines": 0, - "composition": {} + "total_lines": 72583, + "composition": { + "2024": 72566, + "2025": 16, + "2023": 1 + } }, { "snapshot_date": "2025-02", - "total_lines": 0, - "composition": {} + "total_lines": 84875, + "composition": { + "2025": 13099, + "2024": 71775, + "2023": 1 + } }, { "snapshot_date": "2025-05", - "total_lines": 0, - "composition": {} + "total_lines": 84868, + "composition": { + "2025": 13115, + "2024": 71752, + "2023": 1 + } }, { "snapshot_date": "2025-07", - "total_lines": 0, - "composition": {} + "total_lines": 84822, + "composition": { + "2025": 13176, + "2024": 71645, + "2023": 1 + } }, { "snapshot_date": "2025-08", - "total_lines": 0, - "composition": {} + "total_lines": 106203, + "composition": { + "2025": 35598, + "2024": 70604, + "2023": 1 + } }, { "snapshot_date": "2025-09", - "total_lines": 0, - "composition": {} + "total_lines": 120671, + "composition": { + "2025": 50150, + "2024": 70520, + "2023": 1 + } }, { "snapshot_date": "2025-10", - "total_lines": 0, - "composition": {} + "total_lines": 117588, + "composition": { + "2025": 50371, + "2024": 67217 + } }, { "snapshot_date": "2025-11", - "total_lines": 0, - "composition": {} + "total_lines": 122177, + "composition": { + "2025": 54960, + "2024": 67217 + } }, { "snapshot_date": "2026-01", - "total_lines": 0, - "composition": {} + "total_lines": 123609, + "composition": { + "2025": 56591, + "2024": 67008, + "2026": 10 + } }, { "snapshot_date": "2026-02", - "total_lines": 0, - "composition": {} + "total_lines": 124276, + "composition": { + "2026": 1146, + "2025": 56277, + "2024": 66853 + } }, { "snapshot_date": "2026-03", - "total_lines": 0, - "composition": {} - }, - { - "snapshot_date": "2026-04", - "total_lines": 0, - "composition": {} + "total_lines": 10371, + "composition": { + "2026": 7261, + "2025": 1225, + "2024": 1885 + } } ] \ No newline at end of file diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 45ddd93..d3010a0 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -9,8 +9,6 @@ from collections import defaultdict from datetime import datetime -# FIXME: The data returns 0 lines of code and no composition - def _run_command(cmd: list[str], cwd: str) -> str: """ @@ -27,6 +25,8 @@ def _run_command(cmd: list[str], cwd: str) -> str: stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, + encoding="utf-8", + errors="replace", check=True, ) return result.stdout.strip() @@ -48,7 +48,7 @@ def get_monthly_snapshots(repo_path: str) -> list[tuple[str, str]]: cmd=["git", "log", "--pretty=format:%H|%cI"], cwd=repo_path ) - snapshot: dict = {} + snapshots: dict = {} for line in log_output.splitlines(): if not line: continue @@ -60,43 +60,66 @@ def get_monthly_snapshots(repo_path: str) -> list[tuple[str, str]]: # Git log outputs newest commit first. By assigning to the dictionary, # the last commit processed for a month overwrites earlier ones, # leaving us with the very first commit of that specific month - snapshot[period] = commit_hash + snapshots[period] = commit_hash - return sorted(snapshot.items(), key=lambda x: x[0]) + return sorted(snapshots.items(), key=lambda x: x[0]) # FIXME: Optimisation opportunity: use git blame directly on the file list or if the file is larger than 32KB, # batch it into chunks as that the N would be reduced -def analyze_snapshots(repo_path: str, commit: str) -> dict[str, int]: +def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: """ Analyze the snapshots collected from the repository. - """ - # We don't store the return value here because we only care about the side effect. - # This command physically alters the file in the directory to match the commit's history - # If the command fails, _run_command will raise an exception and halt execution automatically - _run_command(cmd=["git", "checkout", commit], cwd=repo_path) - files_output = _run_command(cmd=["git", "ls-files"], cwd=repo_path) - files = files_output.splitlines() + :param repo_path: Path to the repository + :param commit_hash: Hash of the commit to analyze + :return: Dictionary of file age distribution in months + """ + try: + files_output = _run_command( + cmd=["git", "ls-tree", "-r", "-z", "--name-only", commit_hash], + cwd=repo_path, + ) + except RuntimeError as e: + print(f"Failed to list files in repository: {str(e)}") + return {} + # Split by the null character (because of -z) to handle spaces accurately + files = [f for f in files_output.split("\0") if f] age_distribution = defaultdict(int) for file in files: - file_path = os.path.join(repo_path, file) - if os.path.exists(file_path): - continue - try: - # '--line-porcelain' provides machine-readable output of blame, including author-time + # Blame the file directly at the specific commit in history + # The '--' ensure git doesn;t confuse filename with flas blame_output = _run_command( - cmd=["git", "blame", "--line-porcelain", file], cwd=repo_path + ["git", "blame", "--line-porcelain", commit_hash, "--", file], + cwd=repo_path, ) + + commit_to_year = {} + current_commit = None + + # A robust state machine to parse Git porcelain format + # Porcelain format only prints the 'author-time' once per commit block + # so we must remember the year for each commit has we encounter. for line in blame_output.splitlines(): - if line.startswith("author-time "): - timestamp = int(line.split(" ")[1]) - birth_year = datetime.fromtimestamp(timestamp).strftime("%Y") - age_distribution[birth_year] += 1 - break + if line.startswith("\t"): + if current_commit in commit_to_year: + age_distribution[commit_to_year[current_commit]] += 1 + else: + parts = line.split(" ") + # A 40 (or 64 for SHA-256) character hash marks the start of a new blame block + if len(parts[0]) in (40, 64): + current_commit = parts[0] + elif parts[0] == "author-time": + try: + timestamp = int(parts[1]) + commit_to_year[current_commit] = datetime.fromtimestamp( + timestamp + ).strftime("%Y") + except (ValueError, OverflowError, IndexError): + pass except RuntimeError: # Skip files that git blame cannot process (e.g., binary files) continue @@ -127,6 +150,7 @@ def generate_theseus_data(target_repo_path: str, output_json_fname: str) -> None Orchestrate the extract of Ship of Theseus code persistence data using an incremental load strategy by just processing the delta """ + print(f"Starting analysis on {target_repo_path}...") # System design thinking is that we don't want to load existing state and recalculate redundantly # # Let's say we have a 10-year old repository. Running git blame on every file for every single month From b671bb4832d4732bdf099895222f7a3c4b3fd4fd Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 15:14:47 +0530 Subject: [PATCH 11/22] #1 feat: add dynamic repo cloning and processing --- data/theseus_data.json | 159 ---------------------------------- scripts/analyse_repository.py | 143 +++++++++++++++++++----------- 2 files changed, 92 insertions(+), 210 deletions(-) delete mode 100644 data/theseus_data.json diff --git a/data/theseus_data.json b/data/theseus_data.json deleted file mode 100644 index 98e8f53..0000000 --- a/data/theseus_data.json +++ /dev/null @@ -1,159 +0,0 @@ -[ - { - "snapshot_date": "2023-08", - "total_lines": 2, - "composition": { - "2023": 2 - } - }, - { - "snapshot_date": "2023-09", - "total_lines": 120887, - "composition": { - "2023": 120887 - } - }, - { - "snapshot_date": "2023-11", - "total_lines": 123485, - "composition": { - "2023": 123485 - } - }, - { - "snapshot_date": "2023-12", - "total_lines": 130130, - "composition": { - "2023": 130130 - } - }, - { - "snapshot_date": "2024-07", - "total_lines": 8678, - "composition": { - "2024": 8677, - "2023": 1 - } - }, - { - "snapshot_date": "2024-08", - "total_lines": 56103, - "composition": { - "2024": 56102, - "2023": 1 - } - }, - { - "snapshot_date": "2024-10", - "total_lines": 56127, - "composition": { - "2024": 56126, - "2023": 1 - } - }, - { - "snapshot_date": "2024-12", - "total_lines": 63804, - "composition": { - "2024": 63803, - "2023": 1 - } - }, - { - "snapshot_date": "2025-01", - "total_lines": 72583, - "composition": { - "2024": 72566, - "2025": 16, - "2023": 1 - } - }, - { - "snapshot_date": "2025-02", - "total_lines": 84875, - "composition": { - "2025": 13099, - "2024": 71775, - "2023": 1 - } - }, - { - "snapshot_date": "2025-05", - "total_lines": 84868, - "composition": { - "2025": 13115, - "2024": 71752, - "2023": 1 - } - }, - { - "snapshot_date": "2025-07", - "total_lines": 84822, - "composition": { - "2025": 13176, - "2024": 71645, - "2023": 1 - } - }, - { - "snapshot_date": "2025-08", - "total_lines": 106203, - "composition": { - "2025": 35598, - "2024": 70604, - "2023": 1 - } - }, - { - "snapshot_date": "2025-09", - "total_lines": 120671, - "composition": { - "2025": 50150, - "2024": 70520, - "2023": 1 - } - }, - { - "snapshot_date": "2025-10", - "total_lines": 117588, - "composition": { - "2025": 50371, - "2024": 67217 - } - }, - { - "snapshot_date": "2025-11", - "total_lines": 122177, - "composition": { - "2025": 54960, - "2024": 67217 - } - }, - { - "snapshot_date": "2026-01", - "total_lines": 123609, - "composition": { - "2025": 56591, - "2024": 67008, - "2026": 10 - } - }, - { - "snapshot_date": "2026-02", - "total_lines": 124276, - "composition": { - "2026": 1146, - "2025": 56277, - "2024": 66853 - } - }, - { - "snapshot_date": "2026-03", - "total_lines": 10371, - "composition": { - "2026": 7261, - "2025": 1225, - "2024": 1885 - } - } -] \ No newline at end of file diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index d3010a0..3e4d56f 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -5,12 +5,13 @@ import json import os +import shutil import subprocess from collections import defaultdict from datetime import datetime -def _run_command(cmd: list[str], cwd: str) -> str: +def _run_command(cmd: list[str], cwd: str = None) -> str: """ Execute a shell command and return it's standard output @@ -36,6 +37,18 @@ def _run_command(cmd: list[str], cwd: str) -> str: ) from e +def clone_repository(repo_slug: str, clone_dir: str) -> None: + """ + Dynamically clone a GitHub repository given its owner/name slug. + + :param repo_slug: The GitHub repository identifier (e.g., 'facebook/react'). + :param clone_dir: The local directory where the repository should be cloned. + """ + print(f"Cloning {repo_slug} into {clone_dir}...") + repo_url = f"https://github.com/{repo_slug}.git" + _run_command(["git", "clone", repo_url, clone_dir]) + + def get_monthly_snapshots(repo_path: str) -> list[tuple[str, str]]: """ Identify one commit per month to act as a historical snapshot. @@ -145,71 +158,99 @@ def load_existing_state(json_fname: str) -> list[dict]: # TODO: Make the main function to tie everything together -def generate_theseus_data(target_repo_path: str, output_json_fname: str) -> None: +def process_repository(repo_slug: str, data_dir: str) -> None: """ - Orchestrate the extract of Ship of Theseus code persistence data + Orchestrate the extraction of Ship of Theseus code persistence data using an incremental load strategy by just processing the delta + + :param repo_slug: The GitHub repository identifier (e.g., 'facebook/react'). + :param data_dir: Path where the resulting JSON data will be saved. """ - print(f"Starting analysis on {target_repo_path}...") + repo_name = repo_slug.split("/")[-1] + temp_repo_path = f"./temp_workdir_{repo_name}" + output_json_path = os.path.join(data_dir, f"{repo_name}_data.json") # System design thinking is that we don't want to load existing state and recalculate redundantly # + # We clone the repository dynamically just to read it. By pulling the codebase + # ourselves instead of relying on GitHub Actions checkout steps, we can iterate + # through 10, 50, or 100 repositories entirely within Python. + # # Let's say we have a 10-year old repository. Running git blame on every file for every single month # of it's 120 month long history would take hours and it would blow past the GitHub Action's free tier limit. # By loading the existing state and only processing the delta, we can avoid this and run much faster. # # This reduces a 30-minute monthly compute job down to about 5 seconds, # ensuring that I don't have to pay for keeping this project alive lmao. - historical_data = load_existing_state(output_json_fname) - processed_periods = set(item["snapshot_date"] for item in historical_data) - - all_snapshots = get_monthly_snapshots(target_repo_path) - new_snapshots = [] - - for period, commit in all_snapshots: - if period in processed_periods: - # We already know that the repository looked like this month. Skip it. - continue - - print(f"Calculating DELTA for new period: {period} (Commit: {commit[:7]})...") - distribution = analyze_snapshots(target_repo_path, commit) - - new_snapshots.append( - { - "snapshot_date": period, - "total_lines": sum(distribution.values()), - "composition": distribution, - } - ) - - if not new_snapshots: - print("No new months to process. Repository data is already up-to-date.") - return - - # Combine the historical data with the newly processed delta - final_dataset = historical_data + new_snapshots + try: + if not os.path.exists(temp_repo_path): + clone_repository(repo_slug, temp_repo_path) + else: + print(f"Repository {repo_name} already exists locally. Fetching latest...") + _run_command(["git", "fetch", "--all"], cwd=temp_repo_path) + _run_command(["git", "checkout", "main"], cwd=temp_repo_path) + _run_command(["git", "pull"], cwd=temp_repo_path) + + historical_data = load_existing_state(output_json_path) + processed_periods = set(item["snapshot_date"] for item in historical_data) + + all_snapshots = get_monthly_snapshots(temp_repo_path) + new_data = [] + + for period, commit in all_snapshots: + if period in processed_periods: + # We already know what the repository looked like in this month. Skip it. + continue + + print( + f"[{repo_name}] Calculating DELTA for new period: {period} (Commit: {commit[:7]})..." + ) + distribution = analyze_snapshots(temp_repo_path, commit) + + new_data.append( + { + "snapshot_date": period, + "total_lines": sum(distribution.values()), + "composition": distribution, + } + ) - # Ensure chronological order to prevent rendering glitches on the frontend - final_dataset.sort(key=lambda x: x["snapshot_date"]) + if not new_data: + print( + f"[{repo_name}] No new months to process. Data is already up to date!" + ) + else: + final_dataset = historical_data + new_data + final_dataset.sort(key=lambda x: x["snapshot_date"]) - # Polite cleanup: return the repo to its intial state - _run_command(["git", "checkout", "-"], cwd=target_repo_path) + with open(output_json_path, "w") as f: + json.dump(final_dataset, f, indent=4) - with open(output_json_fname, "w", encoding="utf-8") as f: - json.dump(final_dataset, f, indent=4) + print( + f"[{repo_name}] Delta analysis complete. Appended {len(new_data)} new months." + ) - print( - f"Delta analysis completed. Appended {len(new_snapshots)} new months to the dataset." - ) + finally: + # Polite cleanup: Remove the gigantic source code folders we downloaded. + # We only want to keep the JSON data! + if os.path.exists(temp_repo_path): + print(f"Cleaning up temporary directory: {temp_repo_path}") + # Note: Windows might need special handling for git files, but this works on Linux/Mac (GitHub Actions) + shutil.rmtree(temp_repo_path, ignore_errors=True) -# FIXME: Make this into argparse or list for scalability if __name__ == "__main__": - TARGET_REPO_PATH = "C:\\Users\\sayye\\OneDrive\\Documents\\GitHub\\portfolio" - OUTPUT_JSON_FNAME = "./data/theseus_data.json" - - os.makedirs(os.path.dirname(OUTPUT_JSON_FNAME), exist_ok=True) - - if os.path.exists(TARGET_REPO_PATH): - generate_theseus_data(TARGET_REPO_PATH, OUTPUT_JSON_FNAME) - else: - print(f"Target repository not found: {TARGET_REPO_PATH}") + DATA_OUTPUT_DIR = "./data" + os.makedirs(DATA_OUTPUT_DIR, exist_ok=True) + + # --------------------------------------------------------- + # The Case Studies: Add any public repository you want here! + # --------------------------------------------------------- + TARGETS = [ + "facebook/react", # The modern web standard + "vuejs/vue", # A fantastic comparison to React + "d3/d3", # The data-viz giant + ] + + for target in TARGETS: + print(f"\n{'=' * 50}\nStarting analysis pipeline for: {target}\n{'=' * 50}") + process_repository(target, DATA_OUTPUT_DIR) From 181ec70c188631e3bc049844aa81db24b113dbc8 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 15:26:56 +0530 Subject: [PATCH 12/22] #1 fix(style): extracted logic into _parse_blame_output and added timer --- scripts/analyse_repository.py | 125 ++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 44 deletions(-) diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 3e4d56f..90316fb 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -7,11 +7,31 @@ import os import shutil import subprocess +import time from collections import defaultdict from datetime import datetime +from functools import wraps -def _run_command(cmd: list[str], cwd: str = None) -> str: +def timer(func): + """ + A decorator that prints the execution time of the function it wraps. + Used for benchmarking sequential vs. concurrent execution optimizations. + """ + + @wraps(func) + def wrapper(*args, **kwargs): + start_time = time.perf_counter() + result = func(*args, **kwargs) + end_time = time.perf_counter() + execution_time = end_time - start_time + print(f"⏱️ [TIMER] '{func.__name__}' executed in {execution_time:.4f} seconds") + return result + + return wrapper + + +def _run_command(cmd: list[str], cwd: str | None = None) -> str: """ Execute a shell command and return it's standard output @@ -78,63 +98,76 @@ def get_monthly_snapshots(repo_path: str) -> list[tuple[str, str]]: return sorted(snapshots.items(), key=lambda x: x[0]) +def _parse_blame_output(blame_output: str) -> dict[str, int]: + """ + Parse git blame --line-porcelain output, returning a year -> line count mapping. + Extracting this logic reduces nesting and properly handles Git's porcelain format, + where 'author-time' is only printed once per commit block, but actual code lines + begin with a tab character. + + :param blame_output: The raw output from git blame --line-porcelain + :return: A dictionary mapping years to the number of lines changed in that year + """ + file_distribution = defaultdict(int) + commit_to_year = {} + current_commit = None + + for line in blame_output.splitlines(): + if line.startswith("\t"): + # This is an actual line of code. Attribute it to the year of the current commit. + if current_commit and current_commit in commit_to_year: + year = commit_to_year[current_commit] + file_distribution[year] += 1 + else: + parts = line.split(" ") + # Check if the line starts with a 40-char (SHA-1) or 64-char (SHA-256) commit hash + if len(parts[0]) in (40, 64): + current_commit = parts[0] + elif parts[0] == "author-time": + try: + timestamp = int(parts[1]) + year = datetime.fromtimestamp(timestamp).strftime("%Y") + commit_to_year[current_commit] = year + except (ValueError, IndexError): + pass + + return dict(file_distribution) + + # FIXME: Optimisation opportunity: use git blame directly on the file list or if the file is larger than 32KB, # batch it into chunks as that the N would be reduced +@timer def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: """ Analyze the snapshots collected from the repository. :param repo_path: Path to the repository :param commit_hash: Hash of the commit to analyze - :return: Dictionary of file age distribution in months + :return: Dictionary mapping birth year to line count """ - try: - files_output = _run_command( - cmd=["git", "ls-tree", "-r", "-z", "--name-only", commit_hash], - cwd=repo_path, - ) - except RuntimeError as e: - print(f"Failed to list files in repository: {str(e)}") - return {} + _run_command(["git", "checkout", commit_hash], cwd=repo_path) + files_output = _run_command(["git", "ls-files"], cwd=repo_path) + files = files_output.splitlines() - # Split by the null character (because of -z) to handle spaces accurately - files = [f for f in files_output.split("\0") if f] age_distribution = defaultdict(int) for file in files: + file_path = os.path.join(repo_path, file) + if not os.path.isfile(file_path): + continue + try: - # Blame the file directly at the specific commit in history - # The '--' ensure git doesn;t confuse filename with flas + # --line-porcelain provides machine-readable output including author-time blame_output = _run_command( - ["git", "blame", "--line-porcelain", commit_hash, "--", file], - cwd=repo_path, + ["git", "blame", "--line-porcelain", file], cwd=repo_path ) + file_dist = _parse_blame_output(blame_output) - commit_to_year = {} - current_commit = None - - # A robust state machine to parse Git porcelain format - # Porcelain format only prints the 'author-time' once per commit block - # so we must remember the year for each commit has we encounter. - for line in blame_output.splitlines(): - if line.startswith("\t"): - if current_commit in commit_to_year: - age_distribution[commit_to_year[current_commit]] += 1 - else: - parts = line.split(" ") - # A 40 (or 64 for SHA-256) character hash marks the start of a new blame block - if len(parts[0]) in (40, 64): - current_commit = parts[0] - elif parts[0] == "author-time": - try: - timestamp = int(parts[1]) - commit_to_year[current_commit] = datetime.fromtimestamp( - timestamp - ).strftime("%Y") - except (ValueError, OverflowError, IndexError): - pass + # Aggregate the file's age distribution into the snapshot's total + for year, count in file_dist.items(): + age_distribution[year] += count except RuntimeError: - # Skip files that git blame cannot process (e.g., binary files) + # Skip files that git blame cannot process (like binaries) continue return dict(age_distribution) @@ -158,6 +191,7 @@ def load_existing_state(json_fname: str) -> list[dict]: # TODO: Make the main function to tie everything together +@timer def process_repository(repo_slug: str, data_dir: str) -> None: """ Orchestrate the extraction of Ship of Theseus code persistence data @@ -222,7 +256,7 @@ def process_repository(repo_slug: str, data_dir: str) -> None: final_dataset = historical_data + new_data final_dataset.sort(key=lambda x: x["snapshot_date"]) - with open(output_json_path, "w") as f: + with open(output_json_path, "w", encoding="utf-8") as f: json.dump(final_dataset, f, indent=4) print( @@ -242,15 +276,18 @@ def process_repository(repo_slug: str, data_dir: str) -> None: DATA_OUTPUT_DIR = "./data" os.makedirs(DATA_OUTPUT_DIR, exist_ok=True) - # --------------------------------------------------------- - # The Case Studies: Add any public repository you want here! - # --------------------------------------------------------- + # The Case Studies: Start with these three to benchmark. TARGETS = [ "facebook/react", # The modern web standard "vuejs/vue", # A fantastic comparison to React "d3/d3", # The data-viz giant ] + overall_start = time.perf_counter() for target in TARGETS: print(f"\n{'=' * 50}\nStarting analysis pipeline for: {target}\n{'=' * 50}") process_repository(target, DATA_OUTPUT_DIR) + overall_end = time.perf_counter() + print( + f"\n{'=' * 50}\nTOTAL PIPELINE EXECUTION TIME: {overall_end - overall_start:.2f} seconds\n{'=' * 50}" + ) From d7d7aa4a71da2c28d5a8f0e18c47c11f2e75ae70 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 16:08:24 +0530 Subject: [PATCH 13/22] #1 refactor: implement threadpooling in reduce blame processing time --- data/claude-code_data.json | 111 ++++++++++++++++++++++++++++++++++ scripts/analyse_repository.py | 49 +++++++++------ 2 files changed, 140 insertions(+), 20 deletions(-) create mode 100644 data/claude-code_data.json diff --git a/data/claude-code_data.json b/data/claude-code_data.json new file mode 100644 index 0000000..2e3affd --- /dev/null +++ b/data/claude-code_data.json @@ -0,0 +1,111 @@ +[ + { + "snapshot_date": "2025-02", + "total_lines": 315, + "composition": { + "2025": 315 + } + }, + { + "snapshot_date": "2025-03", + "total_lines": 368, + "composition": { + "2025": 368 + } + }, + { + "snapshot_date": "2025-04", + "total_lines": 455, + "composition": { + "2025": 455 + } + }, + { + "snapshot_date": "2025-05", + "total_lines": 756, + "composition": { + "2025": 756 + } + }, + { + "snapshot_date": "2025-06", + "total_lines": 876, + "composition": { + "2025": 876 + } + }, + { + "snapshot_date": "2025-07", + "total_lines": 77387, + "composition": { + "2025": 77387 + } + }, + { + "snapshot_date": "2025-08", + "total_lines": 77900, + "composition": { + "2025": 77900 + } + }, + { + "snapshot_date": "2025-09", + "total_lines": 78823, + "composition": { + "2025": 78823 + } + }, + { + "snapshot_date": "2025-10", + "total_lines": 56350, + "composition": { + "2025": 56350 + } + }, + { + "snapshot_date": "2025-11", + "total_lines": 60301, + "composition": { + "2025": 60301 + } + }, + { + "snapshot_date": "2025-12", + "total_lines": 86284, + "composition": { + "2025": 86284 + } + }, + { + "snapshot_date": "2026-01", + "total_lines": 86661, + "composition": { + "2025": 86626, + "2026": 35 + } + }, + { + "snapshot_date": "2026-02", + "total_lines": 87073, + "composition": { + "2025": 86420, + "2026": 653 + } + }, + { + "snapshot_date": "2026-03", + "total_lines": 87847, + "composition": { + "2025": 86142, + "2026": 1705 + } + }, + { + "snapshot_date": "2026-04", + "total_lines": 88573, + "composition": { + "2025": 86142, + "2026": 2431 + } + } +] \ No newline at end of file diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 90316fb..2db5896 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -3,6 +3,7 @@ Collects the 12 blames per year for target repository """ +import concurrent.futures import json import os import shutil @@ -134,8 +135,21 @@ def _parse_blame_output(blame_output: str) -> dict[str, int]: return dict(file_distribution) -# FIXME: Optimisation opportunity: use git blame directly on the file list or if the file is larger than 32KB, -# batch it into chunks as that the N would be reduced +def _blame_single_file(repo_path: str, file: str) -> dict[str, int]: + """ + Worker function to run git blame on a single file. + Designed to be run concurrently in a ThreadPool. + """ + try: + blame_output = _run_command( + ["git", "blame", "--line-porcelain", file], cwd=repo_path + ) + return _parse_blame_output(blame_output) + except RuntimeError: + # Skip files that git blame cannot process (like binaries) + return {} + + @timer def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: """ @@ -151,24 +165,21 @@ def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: age_distribution = defaultdict(int) - for file in files: - file_path = os.path.join(repo_path, file) - if not os.path.isfile(file_path): - continue + valid_files = [f for f in files if os.path.isfile(os.path.join(repo_path, f))] - try: - # --line-porcelain provides machine-readable output including author-time - blame_output = _run_command( - ["git", "blame", "--line-porcelain", file], cwd=repo_path - ) - file_dist = _parse_blame_output(blame_output) + # Use ThreadPoolExecutor to bypass the O(N) sequential subprocess bottleneck. + # Subprocess calls release the GIL, making threading highly effective here. + max_threads = 20 + with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor: + future_to_file = { + executor.submit(_blame_single_file, repo_path, file): file + for file in valid_files + } - # Aggregate the file's age distribution into the snapshot's total + for future in concurrent.futures.as_completed(future_to_file): + file_dist = future.result() for year, count in file_dist.items(): age_distribution[year] += count - except RuntimeError: - # Skip files that git blame cannot process (like binaries) - continue return dict(age_distribution) @@ -276,11 +287,9 @@ def process_repository(repo_slug: str, data_dir: str) -> None: DATA_OUTPUT_DIR = "./data" os.makedirs(DATA_OUTPUT_DIR, exist_ok=True) - # The Case Studies: Start with these three to benchmark. + # The Case Studies: Start with these one to benchmark. TARGETS = [ - "facebook/react", # The modern web standard - "vuejs/vue", # A fantastic comparison to React - "d3/d3", # The data-viz giant + "anthropics/claude-code", ] overall_start = time.perf_counter() From 01f031c7afa711ccfdaafdc1318f965bfd771507 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 16:25:03 +0530 Subject: [PATCH 14/22] #1 refactor: update the versioning for precommit --- .pre-commit-config.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 44210bb..e290340 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,20 +7,20 @@ repos: - id: check-yaml - id: check-added-large-files - - repo: https://github.com/psf/black - rev: 25.9.0 + - repo: local hooks: - id: black + name: black + entry: poetry run black + language: system + types: [python] - - repo: https://github.com/PyCQA/isort - rev: 6.1.0 - hooks: - id: isort name: isort (python) + entry: poetry run isort + language: system + types: [python] - - repo: https://github.com/pylint-dev/pylint - rev: v3.3.8 - hooks: - id: pylint name: pylint entry: poetry run pylint From c84b4e01c371b6d5acc5a4d3998b6be44627cc22 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 16:26:12 +0530 Subject: [PATCH 15/22] #1 refactor: enforce PEP 508 on the poetry versioning --- pyproject.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3690a90..21ed94d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,11 +13,11 @@ dependencies = [ [dependency-groups] dev = [ - "pytest (==9.0.2)", - "pre-commit (>=4.5.1,<5.0.0)", - "isort (>=8.0.1,<9.0.0)", - "black (>=26.3.1,<27.0.0)", - "pylint (>=4.0.5,<5.0.0)" + "pytest==9.0.2", + "pre-commit>=4.5.1,<5.0.0", + "isort>=8.0.1,<9.0.0", + "black>=26.3.1,<27.0.0", + "pylint>=4.0.5,<5.0.0" ] [tool.pylint.format] From 3fc9b4f8b99b9821dd7f5b6eb256be263987efd7 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 16:27:00 +0530 Subject: [PATCH 16/22] #1 refactor: update poetry core constraint --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 21ed94d..58c6973 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,5 +27,5 @@ max-line-length = 120 disable = ["fixme"] [build-system] -requires = ["poetry-core>=2.0.0,<3.0.0"] +requires = ["poetry-core>=2.2.0,<3.0.0"] build-backend = "poetry.core.masonry.api" From 8d7972e0067de24214e46d314246715ba8a12e7d Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 16:28:42 +0530 Subject: [PATCH 17/22] #1 fix: grammer in docstring --- scripts/analyse_repository.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 2db5896..17ee864 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -1,6 +1,6 @@ """ -This script is reposible for doing the heavy lifting. -Collects the 12 blames per year for target repository +This script is responsible for doing the heavy lifting. +Processes monthly snapshots incrementally to track code age distribution. """ import concurrent.futures From f2210cf57b344f49caad8cd0487eed082cc99c43 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 16:33:23 +0530 Subject: [PATCH 18/22] #1 fix: add error handling for when base is not main but master --- scripts/analyse_repository.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 17ee864..8bf6e7a 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -232,7 +232,12 @@ def process_repository(repo_slug: str, data_dir: str) -> None: else: print(f"Repository {repo_name} already exists locally. Fetching latest...") _run_command(["git", "fetch", "--all"], cwd=temp_repo_path) - _run_command(["git", "checkout", "main"], cwd=temp_repo_path) + for branch in ["main", "master"]: + try: + _run_command(["git", "checkout", branch], cwd=temp_repo_path) + break + except RuntimeError: + continue _run_command(["git", "pull"], cwd=temp_repo_path) historical_data = load_existing_state(output_json_path) From 02de6e4e899c418583c7464f5532c84d20e6a434 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 16:35:37 +0530 Subject: [PATCH 19/22] #1 fix: add timezone to prevent mismatch when running on github actions --- scripts/analyse_repository.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 8bf6e7a..11fdb4a 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -10,7 +10,7 @@ import subprocess import time from collections import defaultdict -from datetime import datetime +from datetime import datetime, timezone from functools import wraps @@ -127,7 +127,9 @@ def _parse_blame_output(blame_output: str) -> dict[str, int]: elif parts[0] == "author-time": try: timestamp = int(parts[1]) - year = datetime.fromtimestamp(timestamp).strftime("%Y") + year = datetime.fromtimestamp(timestamp, timezone.utc).strftime( + "%Y" + ) commit_to_year[current_commit] = year except (ValueError, IndexError): pass From 3a35d0ea3e3f3cdd68c077d5f3d2700db85568e2 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 16:45:05 +0530 Subject: [PATCH 20/22] #1 ci: add testing scripts and workflow --- .github/workflows/integration-tests.yml | 30 ++++++ tests/test_analyse_repository.py | 130 ++++++++++++++++++++++++ 2 files changed, 160 insertions(+) create mode 100644 .github/workflows/integration-tests.yml create mode 100644 tests/test_analyse_repository.py diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 0000000..7e5f0fa --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,30 @@ +name: Integration Tests + +on: + push: + branches: [main, develop] + pull_request: + branches: [main, develop] + +jobs: + test: + name: Run Integration Tests + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Poetry + run: pipx install poetry + + - name: Install dependencies + run: poetry install --with dev + + - name: Run tests + run: poetry run pytest tests/ -v --tb=short diff --git a/tests/test_analyse_repository.py b/tests/test_analyse_repository.py new file mode 100644 index 0000000..2e4b547 --- /dev/null +++ b/tests/test_analyse_repository.py @@ -0,0 +1,130 @@ +import json +import os +import sys +import tempfile +from datetime import datetime, timezone + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from scripts.analyse_repository import ( + _parse_blame_output, + load_existing_state, +) + + +class TestParseBlameOutput: + """Tests for the git blame output parser.""" + + def test_single_file_single_author_year(self): + blame_output = ( + "abc123def4567890123456789012345678901234 1 1 1\n" + "author Test Author\n" + "author-time 1704067200\n" + "filename test.py\n" + "\tprint('hello world')\n" + ) + result = _parse_blame_output(blame_output) + year = datetime.fromtimestamp(1704067200, timezone.utc).strftime("%Y") + assert result == {year: 1} + + def test_multiple_commits_different_years(self): + blame_output = ( + "abc123def4567890123456789012345678901234 1 1 1\n" + "author Test Author\n" + "author-time 1609459200\n" + "filename test.py\n" + "\tconst x = 1;\n" + "def4567890123456789012345678901234567890 2 2 1\n" + "author Another Author\n" + "author-time 1704067200\n" + "filename test.py\n" + "\tconst y = 2;\n" + ) + result = _parse_blame_output(blame_output) + year_2021 = datetime.fromtimestamp(1609459200, timezone.utc).strftime("%Y") + year_2024 = datetime.fromtimestamp(1704067200, timezone.utc).strftime("%Y") + assert result[year_2021] == 1 + assert result[year_2024] == 1 + + def test_lines_attributed_to_correct_year(self): + blame_output = ( + "abc123def4567890123456789012345678901234 1 1 1\n" + "author Test Author\n" + "author-time 1609459200\n" + "filename test.py\n" + "\tline one\n" + "\tline two\n" + "\tline three\n" + ) + result = _parse_blame_output(blame_output) + year = datetime.fromtimestamp(1609459200, timezone.utc).strftime("%Y") + assert result[year] == 3 + + def test_empty_output(self): + result = _parse_blame_output("") + assert result == {} + + def test_invalid_timestamp_ignored(self): + blame_output = ( + "abc123def4567890123456789012345678901234 1 1 1\n" + "author Test Author\n" + "author-time not_a_number\n" + "filename test.py\n" + "\tprint('hello')\n" + ) + result = _parse_blame_output(blame_output) + assert result == {} + + def test_40_and_64_char_hashes(self): + blame_output = ( + "abc123def4567890123456789012345678901234 1 1 1\n" + "author Test Author\n" + "author-time 1704067200\n" + "filename test.py\n" + "\tprint('hello')\n" + ) + result = _parse_blame_output(blame_output) + year = datetime.fromtimestamp(1704067200, timezone.utc).strftime("%Y") + assert year in result + + +class TestLoadExistingState: + """Tests for loading existing JSON state.""" + + def test_load_valid_json(self): + data = [ + { + "snapshot_date": "2024-01", + "total_lines": 100, + "composition": {"2020": 100}, + }, + { + "snapshot_date": "2024-02", + "total_lines": 150, + "composition": {"2020": 150}, + }, + ] + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + json.dump(data, f) + f.flush() + + result = load_existing_state(f.name) + assert len(result) == 2 + assert result[0]["snapshot_date"] == "2024-01" + + os.unlink(f.name) + + def test_file_not_exists(self): + result = load_existing_state("/nonexistent/path/data.json") + assert result == [] + + def test_corrupted_json_returns_empty(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + f.write("not valid json {") + f.flush() + + result = load_existing_state(f.name) + assert result == [] + + os.unlink(f.name) From 7d0d2e0741b778eb853bad9cefd0bd2808514918 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 16:46:07 +0530 Subject: [PATCH 21/22] #1 style: fix typing error --- scripts/analyse_repository.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 11fdb4a..12519b9 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -34,7 +34,7 @@ def wrapper(*args, **kwargs): def _run_command(cmd: list[str], cwd: str | None = None) -> str: """ - Execute a shell command and return it's standard output + Execute a shell command and return its standard output :param cmd: List of arguments forming the command. :param cwd: Directory path where the command should be executed. @@ -294,7 +294,7 @@ def process_repository(repo_slug: str, data_dir: str) -> None: DATA_OUTPUT_DIR = "./data" os.makedirs(DATA_OUTPUT_DIR, exist_ok=True) - # The Case Studies: Start with these one to benchmark. + # The Case Studies: Start with this one to benchmark. TARGETS = [ "anthropics/claude-code", ] From 1a7a08ca6d6377bb0e4863d3e04ef75c973e315e Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 16:47:50 +0530 Subject: [PATCH 22/22] #1 refactor(ci): add --no-root flag --- .github/workflows/integration-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 7e5f0fa..6b4afe5 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -24,7 +24,7 @@ jobs: run: pipx install poetry - name: Install dependencies - run: poetry install --with dev + run: poetry install --no-root --with dev - name: Run tests run: poetry run pytest tests/ -v --tb=short