From ec90d6593ebc1ebb54d13466e43958d9ea8aa2a4 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Fri, 29 May 2026 23:37:59 +0530 Subject: [PATCH 1/9] #33 fixed some major breaking changes --- data/claude-code_data.json | 1 - data/langchain_data.json | 1 - data/numpy_data.json | 1 - data/react_data.json | 1 - data/zed_data.json | 1 - scripts/__init__.py | 0 scripts/_utils.py | 84 ++++++++++++++++++++ scripts/add_fossils.py | 143 +++++++++++++++++++--------------- scripts/analyse_repository.py | 62 ++++++--------- 9 files changed, 188 insertions(+), 106 deletions(-) delete mode 100644 data/claude-code_data.json delete mode 100644 data/langchain_data.json delete mode 100644 data/numpy_data.json delete mode 100644 data/react_data.json delete mode 100644 data/zed_data.json create mode 100644 scripts/__init__.py create mode 100644 scripts/_utils.py diff --git a/data/claude-code_data.json b/data/claude-code_data.json deleted file mode 100644 index 53b53bb..0000000 --- a/data/claude-code_data.json +++ /dev/null @@ -1 +0,0 @@ -{"snapshots":[{"snapshot_date":"2025-02","composition":{"2025":315}},{"snapshot_date":"2025-03","composition":{"2025":368}},{"snapshot_date":"2025-04","composition":{"2025":455}},{"snapshot_date":"2025-05","composition":{"2025":756}},{"snapshot_date":"2025-06","composition":{"2025":876}},{"snapshot_date":"2025-07","composition":{"2025":77387}},{"snapshot_date":"2025-08","composition":{"2025":77900}},{"snapshot_date":"2025-09","composition":{"2025":78823}},{"snapshot_date":"2025-10","composition":{"2025":56350}},{"snapshot_date":"2025-11","composition":{"2025":60301}},{"snapshot_date":"2025-12","composition":{"2025":86284}},{"snapshot_date":"2026-01","composition":{"2025":86626,"2026":35}},{"snapshot_date":"2026-02","composition":{"2025":86420,"2026":653}},{"snapshot_date":"2026-03","composition":{"2025":86142,"2026":1705}},{"snapshot_date":"2026-04","composition":{"2025":86142,"2026":2431}},{"snapshot_date":"2026-05","composition":{"2026":3361,"2025":86136}}],"fossils":{"genesis":{"timestamp":1740245022,"file":".devcontainer/Dockerfile","content":"FROM node:20","year":"2025","commit":"bd5ca70","view_commit":"bd5ca708adf82c4b81857abf40fe36d9d9cc3d1c","line":1},"survivor":{"timestamp":1740245022,"file":"README.md","content":"When you use Claude Code, we collect feedback, which includes usage data (such as code acceptance or rejections), associated conversation data, and user feedback submitted via the `/bug` command.","year":"2025","commit":"bd5ca70","view_commit":"main","line":62}}} \ No newline at end of file diff --git a/data/langchain_data.json b/data/langchain_data.json deleted file mode 100644 index b2267fd..0000000 --- a/data/langchain_data.json +++ /dev/null @@ -1 +0,0 @@ -{"snapshots":[{"snapshot_date":"2022-12","composition":{"2022":15774}},{"snapshot_date":"2023-03","composition":{"2022":28253,"2023":61969}},{"snapshot_date":"2023-06","composition":{"2023":487975,"2022":21681}},{"snapshot_date":"2023-09","composition":{"2023":681274,"2022":16291}},{"snapshot_date":"2023-12","composition":{"2023":881194,"2022":14704}},{"snapshot_date":"2024-03","composition":{"2024":357436,"2023":944924,"2022":10842}},{"snapshot_date":"2024-06","composition":{"2023":826677,"2024":645423,"2022":7685}},{"snapshot_date":"2024-09","composition":{"2023":686097,"2024":664907,"2022":7295}},{"snapshot_date":"2024-12","composition":{"2024":683157,"2023":546569,"2022":7138}},{"snapshot_date":"2025-01","composition":{"2024":720654,"2023":544169,"2022":7122,"2025":5}},{"snapshot_date":"2025-02","composition":{"2023":542683,"2024":707703,"2025":56681,"2022":7106}},{"snapshot_date":"2025-03","composition":{"2023":533418,"2024":657044,"2025":124237,"2022":6529}},{"snapshot_date":"2025-04","composition":{"2023":491914,"2024":630484,"2025":129671,"2022":6346}},{"snapshot_date":"2025-05","composition":{"2024":448680,"2025":144491,"2023":315772,"2022":5672}},{"snapshot_date":"2025-06","composition":{"2023":313764,"2024":444312,"2025":158141,"2022":5671}},{"snapshot_date":"2025-07","composition":{"2023":313208,"2024":441747,"2025":170384,"2022":5671}},{"snapshot_date":"2025-08","composition":{"2025":203640,"2023":308982,"2024":435295,"2022":5429}},{"snapshot_date":"2025-09","composition":{"2025":222619,"2023":308625,"2024":433511,"2022":5427}},{"snapshot_date":"2025-10","composition":{"2023":306859,"2025":253209,"2024":429458,"2022":5409}},{"snapshot_date":"2025-11","composition":{"2025":213274,"2023":83597,"2024":138301,"2022":3632}},{"snapshot_date":"2025-12","composition":{"2025":213376,"2023":83259,"2024":137738,"2022":3489}},{"snapshot_date":"2026-01","composition":{"2025":221792,"2023":82989,"2024":136828,"2022":3485,"2026":268}},{"snapshot_date":"2026-02","composition":{"2024":128469,"2023":81140,"2025":207607,"2026":12545,"2022":3485}},{"snapshot_date":"2026-03","composition":{"2023":81121,"2025":198803,"2024":126788,"2026":35860,"2022":3484}},{"snapshot_date":"2026-04","composition":{"2025":195106,"2023":81079,"2024":126636,"2026":47792,"2022":3475}},{"snapshot_date":"2026-05","composition":{"2025":194140,"2026":72120,"2023":125330,"2024":126474,"2022":3475}}],"fossils":{"genesis":{"timestamp":1666648275,"file":".flake8","content":"[flake8]","year":"2022","commit":"18aeb72","view_commit":"18aeb720126a68201c7e3b5a617139c27c779496","line":1},"survivor":{"timestamp":1666648275,"file":".github/workflows/_lint.yml","content":"jobs:","year":"2022","commit":"18aeb72","view_commit":"master","line":33}}} \ No newline at end of file diff --git a/data/numpy_data.json b/data/numpy_data.json deleted file mode 100644 index 595c496..0000000 --- a/data/numpy_data.json +++ /dev/null @@ -1 +0,0 @@ -{"snapshots":[{"snapshot_date":"2001-12","composition":{"2001":1865}},{"snapshot_date":"2002-03","composition":{"2002":94339,"2001":1472}},{"snapshot_date":"2002-06","composition":{"2002":102869,"2001":1179}},{"snapshot_date":"2002-09","composition":{"2002":130360,"2001":1167}},{"snapshot_date":"2002-12","composition":{"2002":132966,"2001":1130}},{"snapshot_date":"2003-03","composition":{"2003":2305,"2002":132607,"2001":1052}},{"snapshot_date":"2003-06","composition":{"2002":132569,"2003":2688,"2001":1047}},{"snapshot_date":"2003-09","composition":{"2003":3793,"2002":132461,"2001":1036}},{"snapshot_date":"2003-12","composition":{"2003":5328,"2002":131017,"2001":1009}},{"snapshot_date":"2004-03","composition":{"2003":3964,"2002":129547,"2004":5960,"2001":449}},{"snapshot_date":"2004-06","composition":{"2004":9689,"2002":129500,"2003":3916,"2001":443}},{"snapshot_date":"2004-09","composition":{"2003":3868,"2002":128923,"2004":10562,"2001":443}},{"snapshot_date":"2004-12","composition":{"2004":13272,"2002":128551,"2003":3680,"2001":437}},{"snapshot_date":"2005-03","composition":{"2002":128546,"2004":13125,"2003":3676,"2005":352,"2001":437}},{"snapshot_date":"2005-06","composition":{"2004":13106,"2005":1801,"2002":128527,"2003":3655,"2001":437}},{"snapshot_date":"2005-09","composition":{"2005":150609,"2002":120948,"2004":2743,"2003":2178}},{"snapshot_date":"2005-12","composition":{"2005":192483,"2002":95435,"2004":2503,"2003":1868}},{"snapshot_date":"2006-03","composition":{"2005":147165,"2006":24364,"2002":1852,"2004":152,"2003":357}},{"snapshot_date":"2006-06","composition":{"2006":42885,"2005":144356,"2002":1837,"2004":148,"2003":355}},{"snapshot_date":"2006-09","composition":{"2005":135193,"2006":106620,"2002":1809,"2004":145,"2003":339}},{"snapshot_date":"2006-12","composition":{"2006":82177,"2005":134843,"2002":1803,"2004":144,"2003":339}},{"snapshot_date":"2007-03","composition":{"2006":76164,"2007":13620,"2005":134590,"2002":1749,"2004":144,"2003":333}},{"snapshot_date":"2007-06","composition":{"2006":74427,"2007":21742,"2005":133132,"2002":1747,"2004":144,"2003":333}},{"snapshot_date":"2007-09","composition":{"2006":61136,"2005":124879,"2007":49906,"2002":1742,"2004":144,"2003":333}},{"snapshot_date":"2007-12","composition":{"2007":58304,"2006":60016,"2005":123821,"2002":1739,"2004":144,"2003":332}},{"snapshot_date":"2008-03","composition":{"2006":58723,"2007":61831,"2005":122014,"2008":11648,"2002":1736,"2004":144,"2003":328}},{"snapshot_date":"2008-06","composition":{"2007":54121,"2006":52922,"2008":35857,"2005":120599,"2002":1602,"2004":141,"2003":316}},{"snapshot_date":"2008-09","composition":{"2008":136007,"2006":33511,"2007":46987,"2005":119750,"2002":1353,"2004":138,"2003":281}},{"snapshot_date":"2008-12","composition":{"2007":45694,"2006":33032,"2005":118888,"2008":162098,"2002":1352,"2004":138,"2003":281}},{"snapshot_date":"2009-03","composition":{"2005":118010,"2006":31300,"2007":40897,"2009":45419,"2008":155221,"2002":1351,"2004":138,"2003":278}},{"snapshot_date":"2009-06","composition":{"2007":30084,"2006":26011,"2009":87758,"2005":116213,"2008":148517,"2002":1346,"2004":138,"2003":278}},{"snapshot_date":"2009-09","composition":{"2008":146944,"2009":100827,"2007":29821,"2006":25778,"2005":116065,"2002":1158,"2004":136,"2003":278}},{"snapshot_date":"2009-12","composition":{"2005":113855,"2009":145000,"2007":28736,"2006":25011,"2008":143661,"2002":1158,"2004":136,"2003":273}},{"snapshot_date":"2010-03","composition":{"2009":136927,"2006":24474,"2008":142180,"2005":111850,"2007":28449,"2010":17329,"2002":1064,"2004":123,"2003":259}},{"snapshot_date":"2010-06","composition":{"2007":28023,"2005":111719,"2009":136179,"2008":141950,"2006":23646,"2010":23093,"2002":1057,"2004":123,"2003":259}},{"snapshot_date":"2010-09","composition":{"2010":28562,"2006":23589,"2007":25037,"2008":141483,"2005":111669,"2009":135214,"2002":1055,"2004":123,"2003":259}},{"snapshot_date":"2010-12","composition":{"2008":140070,"2009":125151,"2006":23469,"2010":43152,"2005":111642,"2007":24957,"2002":1046,"2004":123,"2003":259}},{"snapshot_date":"2011-03","composition":{"2010":49445,"2009":118996,"2006":23132,"2007":23654,"2005":111319,"2008":138780,"2011":38997,"2002":1039,"2004":123,"2003":257}},{"snapshot_date":"2011-06","composition":{"2010":48692,"2008":138338,"2011":58195,"2009":115191,"2006":22996,"2005":111133,"2007":23524,"2002":995,"2004":122,"2003":257}},{"snapshot_date":"2011-09","composition":{"2011":88031,"2010":46487,"2008":137097,"2009":112417,"2007":22880,"2006":22699,"2005":110990,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2011-12","composition":{"2010":46148,"2011":92965,"2008":137041,"2009":110755,"2007":22841,"2005":110983,"2006":22541,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2012-03","composition":{"2010":42510,"2011":95632,"2008":136391,"2009":108206,"2007":22505,"2006":22193,"2005":110401,"2012":12933,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2012-06","composition":{"2012":20487,"2010":41767,"2011":82718,"2005":110335,"2006":21976,"2007":22419,"2009":107297,"2008":136224,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2012-09","composition":{"2011":79589,"2007":22074,"2006":21736,"2010":41149,"2012":30421,"2008":135866,"2009":105324,"2005":110282,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2012-12","composition":{"2011":79086,"2010":40909,"2012":32983,"2008":135824,"2009":104919,"2006":21734,"2007":22062,"2005":110248,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2013-03","composition":{"2012":29512,"2010":40751,"2011":77641,"2013":7097,"2006":21520,"2009":103144,"2008":134237,"2005":109882,"2007":21603,"2002":989,"2004":121,"2003":254}},{"snapshot_date":"2013-06","composition":{"2012":183454,"2010":39837,"2013":24040,"2011":77317,"2008":133562,"2009":101929,"2005":35090,"2006":21080,"2007":21351,"2002":987,"2004":120,"2003":253}},{"snapshot_date":"2013-09","composition":{"2012":181910,"2010":37648,"2013":44136,"2011":75123,"2008":127205,"2009":97164,"2006":14587,"2005":33008,"2007":20078,"2002":711,"2004":78,"2003":252}},{"snapshot_date":"2013-12","composition":{"2012":180411,"2013":46300,"2010":37466,"2011":74810,"2006":14451,"2009":96743,"2005":32818,"2007":19912,"2008":126806,"2002":709,"2004":77,"2003":252}},{"snapshot_date":"2014-03","composition":{"2010":36748,"2012":178530,"2013":43606,"2011":73040,"2014":18240,"2005":30273,"2006":13029,"2007":19769,"2008":45858,"2009":94894,"2002":696,"2004":77,"2003":252}},{"snapshot_date":"2014-06","composition":{"2012":171754,"2014":13714,"2013":41708,"2011":70672,"2010":36188,"2009":87451,"2005":30161,"2008":45113,"2007":18289,"2006":12022,"2002":695,"2004":77,"2003":252}},{"snapshot_date":"2014-09","composition":{"2012":171112,"2014":22362,"2013":40712,"2010":35603,"2011":69888,"2008":44758,"2009":86034,"2006":11875,"2005":29302,"2007":18123,"2002":635,"2004":60,"2003":252}},{"snapshot_date":"2014-12","composition":{"2013":40593,"2010":35489,"2014":25570,"2012":170931,"2011":69766,"2008":44138,"2009":85660,"2005":29175,"2006":11771,"2007":17939,"2002":634,"2004":60,"2003":252}},{"snapshot_date":"2015-03","composition":{"2011":69486,"2013":39957,"2010":35074,"2015":6679,"2012":170636,"2014":26701,"2008":43679,"2009":84814,"2005":29095,"2006":11512,"2007":17877,"2002":634,"2004":60,"2003":252}},{"snapshot_date":"2015-06","composition":{"2012":170329,"2015":12670,"2014":27726,"2013":39699,"2010":34951,"2011":68961,"2006":11386,"2009":84282,"2005":29020,"2008":43249,"2007":17747,"2002":631,"2004":60,"2003":252}},{"snapshot_date":"2015-09","composition":{"2010":34469,"2014":27200,"2015":29669,"2012":169803,"2013":37171,"2011":67345,"2008":41734,"2009":83642,"2006":10984,"2005":26685,"2007":16829,"2002":630,"2004":56,"2003":234}},{"snapshot_date":"2015-12","composition":{"2015":35389,"2013":37010,"2010":34274,"2014":26998,"2012":169642,"2011":66650,"2006":10946,"2008":41411,"2009":83082,"2005":26566,"2007":16737,"2002":629,"2004":56,"2003":234}},{"snapshot_date":"2016-03","composition":{"2013":36586,"2012":169383,"2015":36532,"2016":7776,"2010":34125,"2014":26521,"2008":41062,"2009":82599,"2011":65855,"2005":26355,"2007":16480,"2006":10683,"2002":629,"2004":56,"2003":234}},{"snapshot_date":"2016-06","composition":{"2013":36522,"2015":36608,"2016":10759,"2010":34002,"2014":26371,"2012":169365,"2011":65773,"2008":40860,"2009":82399,"2006":10650,"2005":26319,"2007":16317,"2002":623,"2004":56,"2003":234}},{"snapshot_date":"2016-09","composition":{"2015":36237,"2012":169253,"2016":15735,"2010":33913,"2014":26125,"2013":36265,"2011":65666,"2008":40740,"2009":82280,"2006":10609,"2005":26245,"2007":16285,"2002":623,"2004":56,"2003":234}},{"snapshot_date":"2016-12","composition":{"2012":169155,"2015":36071,"2016":21595,"2014":25757,"2013":35985,"2010":33535,"2011":65332,"2005":26216,"2009":82159,"2008":40607,"2006":10541,"2007":16223,"2002":622,"2004":56,"2003":233}},{"snapshot_date":"2017-03","composition":{"2014":25576,"2017":110922,"2015":35648,"2012":62863,"2016":39453,"2013":35176,"2010":33095,"2011":65019,"2008":39631,"2009":68413,"2005":25919,"2007":16089,"2006":10382,"2002":570,"2004":51,"2003":233}},{"snapshot_date":"2017-06","composition":{"2014":24843,"2017":120751,"2010":32885,"2015":35392,"2016":38979,"2012":62775,"2013":33867,"2011":64813,"2006":10225,"2008":39305,"2005":17793,"2009":66773,"2007":15934,"2002":563,"2004":49,"2003":233}},{"snapshot_date":"2017-09","composition":{"2014":24395,"2017":133110,"2015":34827,"2013":33024,"2010":32136,"2016":38009,"2012":62537,"2011":63614,"2008":38254,"2009":65072,"2005":17186,"2006":10061,"2007":15636,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2017-12","composition":{"2017":149940,"2015":34262,"2016":37400,"2014":24265,"2013":32565,"2012":62341,"2010":31890,"2011":63048,"2006":9933,"2008":37707,"2005":17051,"2009":64466,"2007":15468,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2018-03","composition":{"2018":5510,"2015":33944,"2017":150500,"2016":36999,"2012":61939,"2014":22677,"2013":32357,"2010":31760,"2011":62549,"2008":37535,"2009":64198,"2006":9853,"2005":16919,"2007":15411,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2018-06","composition":{"2018":19708,"2017":150670,"2015":33690,"2016":36365,"2012":61569,"2014":22274,"2010":31401,"2013":31673,"2011":61256,"2006":9774,"2008":36911,"2009":63381,"2005":16840,"2007":15269,"2002":550,"2004":49,"2003":232}},{"snapshot_date":"2018-09","composition":{"2018":30527,"2014":21992,"2017":150375,"2015":33569,"2016":36174,"2012":61426,"2010":31239,"2013":31339,"2011":60402,"2005":16558,"2008":36478,"2007":15145,"2006":9397,"2009":62752,"2002":543,"2004":49,"2003":232}},{"snapshot_date":"2018-12","composition":{"2018":48265,"2017":149585,"2015":32778,"2016":35741,"2014":21809,"2013":31020,"2012":61190,"2006":9063,"2008":35767,"2009":60872,"2005":14910,"2007":15047,"2010":31055,"2011":59340,"2002":539,"2004":49,"2003":232}},{"snapshot_date":"2019-03","composition":{"2017":149531,"2015":32514,"2018":49111,"2016":35564,"2014":21280,"2019":9272,"2012":60851,"2010":30642,"2013":30072,"2011":58341,"2008":35532,"2009":60367,"2006":9016,"2005":14835,"2007":15001,"2002":539,"2004":49,"2003":232}},{"snapshot_date":"2019-06","composition":{"2017":149695,"2015":32943,"2018":41877,"2016":36017,"2014":21938,"2013":31076,"2010":31147,"2012":61227,"2011":59492,"2006":9065,"2005":16430,"2009":61872,"2008":36237,"2007":15084,"2002":539,"2004":49,"2019":64,"2003":232}},{"snapshot_date":"2019-09","composition":{"2018":66238,"2019":47674,"2014":20457,"2017":149155,"2013":29628,"2012":60316,"2015":31132,"2016":34764,"2010":30023,"2011":57507,"2006":8623,"2005":12783,"2009":57563,"2008":33249,"2007":14521,"2002":538,"2004":49,"2003":231}},{"snapshot_date":"2019-12","composition":{"2018":65394,"2019":56820,"2015":30831,"2014":19795,"2017":148850,"2008":32874,"2016":34561,"2009":57304,"2012":60140,"2013":29363,"2010":29927,"2011":57286,"2005":12717,"2007":14497,"2006":8557,"2002":537,"2004":49,"2003":231}},{"snapshot_date":"2020-03","composition":{"2018":64016,"2019":57180,"2020":24171,"2014":19448,"2017":147161,"2015":30240,"2013":28115,"2012":59428,"2016":33939,"2008":32306,"2009":55012,"2010":27781,"2011":55546,"2006":8293,"2005":12409,"2007":14306,"2002":531,"2004":49,"2003":230}},{"snapshot_date":"2020-06","composition":{"2019":56788,"2018":63130,"2020":48891,"2014":19034,"2017":147034,"2008":32160,"2016":33750,"2015":30033,"2009":54186,"2013":27979,"2010":27613,"2012":59098,"2011":54685,"2005":12251,"2007":14291,"2006":8235,"2002":531,"2004":49,"2003":230}},{"snapshot_date":"2020-09","composition":{"2019":55422,"2020":77573,"2018":62241,"2015":29338,"2014":18764,"2017":145552,"2010":19266,"2016":33581,"2012":58501,"2013":27478,"2011":51167,"2006":8067,"2008":30427,"2009":47014,"2005":12176,"2007":14242,"2002":527,"2004":49,"2003":230}},{"snapshot_date":"2020-12","composition":{"2018":61019,"2019":54317,"2020":114522,"2014":18594,"2017":145129,"2015":28817,"2010":19070,"2016":33480,"2012":58445,"2013":26876,"2011":50577,"2008":30008,"2009":46735,"2006":8047,"2005":12142,"2007":14182,"2002":526,"2004":49,"2003":230}},{"snapshot_date":"2021-03","composition":{"2018":61349,"2020":104729,"2019":54845,"2014":18621,"2017":145434,"2015":29101,"2012":58457,"2016":33526,"2010":19104,"2013":27231,"2011":50614,"2008":30056,"2009":46855,"2006":8051,"2005":12145,"2007":14189,"2021":2222,"2002":527,"2004":49,"2003":230}},{"snapshot_date":"2021-06","composition":{"2020":112028,"2021":45055,"2018":59930,"2019":53018,"2015":28456,"2010":18935,"2016":33218,"2012":58314,"2017":144293,"2014":18363,"2013":26466,"2011":47525,"2006":8013,"2008":30260,"2009":46200,"2005":12104,"2007":14141,"2002":524,"2004":49,"2003":230}},{"snapshot_date":"2021-09","composition":{"2018":59052,"2021":97402,"2019":52650,"2020":109267,"2015":27816,"2010":18656,"2016":33061,"2012":58200,"2017":143713,"2014":17505,"2013":25955,"2011":46683,"2006":7931,"2008":29288,"2009":45809,"2005":11659,"2007":13859,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2021-12","composition":{"2020":106648,"2021":121834,"2018":58489,"2019":52144,"2015":27360,"2010":17939,"2016":32977,"2012":57388,"2017":143312,"2014":16983,"2013":25758,"2011":46229,"2008":29088,"2006":7907,"2005":11544,"2009":45709,"2007":13832,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2022-03","composition":{"2021":125502,"2018":58156,"2019":51279,"2020":105366,"2022":17765,"2015":26951,"2012":56311,"2016":32844,"2013":25031,"2010":17808,"2017":143069,"2014":16635,"2011":46017,"2008":28815,"2009":45556,"2005":11537,"2007":13822,"2006":7895,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2022-06","composition":{"2018":57807,"2021":126196,"2020":104264,"2019":50945,"2022":37665,"2015":26535,"2010":17599,"2016":32719,"2012":55823,"2017":142586,"2014":16534,"2013":24376,"2011":45697,"2008":28642,"2009":44994,"2005":11438,"2006":7797,"2007":13733,"2002":516,"2004":48,"2003":229}},{"snapshot_date":"2022-09","composition":{"2020":103997,"2018":57726,"2021":125728,"2022":42874,"2019":50800,"2015":26468,"2010":17534,"2016":32655,"2012":55811,"2017":142510,"2014":16422,"2013":24318,"2011":45687,"2006":7794,"2008":28526,"2009":44517,"2005":11438,"2007":13668,"2002":516,"2004":48,"2003":229}},{"snapshot_date":"2022-12","composition":{"2022":55955,"2018":57356,"2019":50605,"2021":124787,"2020":103290,"2015":25906,"2010":17490,"2016":32562,"2012":55746,"2017":142114,"2014":16365,"2013":23784,"2011":45494,"2006":7761,"2005":11405,"2009":44247,"2008":28450,"2007":13577,"2002":516,"2004":48,"2003":229}},{"snapshot_date":"2023-03","composition":{"2022":59168,"2023":10358,"2021":120524,"2018":56770,"2020":100976,"2019":49742,"2015":25710,"2008":28012,"2016":32315,"2009":44088,"2014":16282,"2017":140354,"2010":17341,"2012":55648,"2013":23482,"2011":45145,"2006":7683,"2005":11389,"2007":13558,"2002":516,"2004":48,"2003":228}},{"snapshot_date":"2023-06","composition":{"2022":58437,"2023":34207,"2021":107984,"2020":100173,"2018":56373,"2019":49163,"2015":25495,"2010":16797,"2016":32250,"2012":55502,"2017":139945,"2014":16125,"2013":23318,"2011":44806,"2008":27784,"2009":43728,"2006":7209,"2005":11334,"2007":13493,"2002":516,"2004":48,"2003":227}},{"snapshot_date":"2023-09","composition":{"2022":55700,"2023":62491,"2021":100401,"2020":94581,"2018":55435,"2019":47753,"2015":24632,"2016":30433,"2017":138546,"2012":55265,"2010":16400,"2014":15672,"2013":22552,"2011":44120,"2008":26921,"2009":41512,"2006":6928,"2005":11158,"2007":13056,"2004":48,"2003":220,"2002":509}},{"snapshot_date":"2023-12","composition":{"2022":54599,"2023":91904,"2021":98879,"2018":53945,"2020":93516,"2019":47067,"2015":24156,"2010":15736,"2016":30177,"2012":54822,"2017":137828,"2014":15179,"2013":21937,"2011":43821,"2006":6464,"2008":26233,"2009":40323,"2005":10910,"2007":12624,"2004":31,"2003":217,"2002":486}},{"snapshot_date":"2024-03","composition":{"2018":50761,"2019":46690,"2022":52839,"2023":83911,"2021":93662,"2020":92785,"2024":30118,"2015":24044,"2010":15661,"2016":30014,"2012":54601,"2017":137644,"2014":15119,"2013":21678,"2011":43446,"2006":6389,"2008":26095,"2005":10883,"2009":39081,"2007":12600,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-06","composition":{"2023":82879,"2024":39429,"2022":52334,"2021":93118,"2018":50598,"2020":92278,"2019":46371,"2015":23986,"2010":15623,"2016":29981,"2012":54472,"2017":137433,"2014":15091,"2013":21640,"2011":43348,"2005":10869,"2008":26022,"2007":12593,"2009":39036,"2006":6379,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-09","composition":{"2023":81060,"2024":58495,"2022":51245,"2021":91299,"2018":49868,"2015":23851,"2019":45868,"2020":91654,"2010":15329,"2016":29872,"2012":54407,"2017":137201,"2014":14959,"2013":21558,"2011":42777,"2006":6362,"2008":25874,"2009":38030,"2005":10829,"2007":12568,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-12","composition":{"2022":50651,"2023":79580,"2020":90805,"2021":89441,"2024":79718,"2018":49399,"2019":45459,"2015":23478,"2010":15070,"2016":29631,"2012":54260,"2017":136825,"2014":14558,"2013":20957,"2011":40920,"2006":6288,"2008":25636,"2009":37605,"2005":10787,"2007":12206,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-01","composition":{"2021":89243,"2022":50615,"2023":79451,"2018":49378,"2024":80020,"2020":90749,"2015":23478,"2019":45451,"2025":2383,"2010":15065,"2016":29621,"2012":54234,"2017":136711,"2014":14557,"2013":20932,"2011":40903,"2008":25635,"2009":37562,"2006":6288,"2005":10787,"2007":12206,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-02","composition":{"2022":50496,"2023":79170,"2021":89103,"2025":6210,"2024":79734,"2018":49255,"2020":90244,"2019":45295,"2015":23474,"2012":54148,"2016":29571,"2017":136630,"2010":15058,"2014":14553,"2013":20915,"2011":40815,"2006":6278,"2005":10787,"2008":25633,"2007":12204,"2009":37544,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-03","composition":{"2022":49804,"2023":78817,"2021":88041,"2024":78977,"2025":12625,"2018":49216,"2019":45287,"2020":90074,"2015":23424,"2010":15051,"2016":29570,"2012":54148,"2017":136627,"2014":14545,"2013":20864,"2011":40815,"2006":6278,"2008":25623,"2009":37543,"2005":10787,"2007":11959,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-04","composition":{"2018":49092,"2023":77780,"2025":19503,"2024":77274,"2021":86816,"2022":49391,"2020":89794,"2019":45201,"2015":23102,"2010":15012,"2016":29497,"2012":54125,"2017":136524,"2014":14525,"2013":20769,"2011":40682,"2008":25591,"2009":37512,"2005":10762,"2007":11951,"2006":6270,"2004":23,"2003":211,"2002":464}},{"snapshot_date":"2025-05","composition":{"2022":49089,"2023":77083,"2018":48737,"2020":89322,"2021":85760,"2025":27815,"2024":76096,"2019":45009,"2015":22923,"2012":54096,"2016":29405,"2017":136356,"2008":25546,"2009":37291,"2014":14458,"2010":14979,"2013":20628,"2011":40637,"2006":6254,"2005":10703,"2007":11866,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-06","composition":{"2022":49033,"2023":76925,"2021":85569,"2025":32977,"2020":89202,"2024":75818,"2018":48697,"2019":44978,"2015":22919,"2010":14972,"2016":29396,"2012":54070,"2017":136354,"2014":14454,"2013":20607,"2011":40621,"2005":10703,"2008":25546,"2007":11865,"2006":6254,"2009":37289,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-07","composition":{"2022":48964,"2023":76778,"2021":85458,"2025":35930,"2020":89056,"2018":48554,"2019":44888,"2024":75473,"2015":22890,"2010":14950,"2016":29238,"2012":54062,"2017":136300,"2014":14447,"2013":20592,"2011":40589,"2008":25513,"2009":37219,"2006":6254,"2005":10699,"2007":11858,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-08","composition":{"2021":84948,"2025":39378,"2022":48745,"2023":76487,"2020":88961,"2024":75168,"2018":48540,"2019":44846,"2015":22809,"2012":54022,"2016":29174,"2017":136261,"2010":14906,"2014":14425,"2013":20457,"2011":40518,"2008":25462,"2009":36955,"2006":6254,"2005":10699,"2007":11844,"2003":211,"2004":22,"2002":450}},{"snapshot_date":"2025-09","composition":{"2022":48340,"2023":76041,"2020":88796,"2021":81876,"2025":44888,"2024":74576,"2018":48164,"2015":22727,"2019":44345,"2010":14874,"2016":28894,"2012":53973,"2017":136220,"2014":14395,"2013":20405,"2011":40432,"2008":25414,"2009":36912,"2006":6223,"2005":10693,"2007":11841,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-10","composition":{"2022":48192,"2023":75293,"2021":80321,"2025":53113,"2018":47992,"2020":88588,"2024":72865,"2019":44311,"2015":22561,"2010":14854,"2016":28862,"2012":53957,"2017":136093,"2014":14375,"2013":20347,"2011":40375,"2006":6195,"2008":25189,"2005":10677,"2009":36813,"2007":11833,"2004":17,"2002":442,"2003":46}},{"snapshot_date":"2025-11","composition":{"2021":79605,"2025":59992,"2022":47943,"2023":74782,"2024":72354,"2020":88240,"2018":47831,"2015":22537,"2019":44191,"2010":14843,"2016":28752,"2012":53950,"2017":135910,"2014":14359,"2013":20274,"2011":40175,"2008":25102,"2009":36633,"2006":6059,"2005":10673,"2007":11780,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2025-12","composition":{"2021":77624,"2025":66882,"2022":47371,"2023":72787,"2018":47273,"2020":83371,"2024":70724,"2019":42674,"2015":21873,"2012":53465,"2016":28263,"2017":134788,"2010":14549,"2014":13988,"2013":19504,"2011":39948,"2008":24669,"2009":34785,"2006":4411,"2005":6078,"2007":9963,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-01","composition":{"2023":72372,"2025":66534,"2024":70327,"2022":47318,"2021":76902,"2018":47222,"2020":83204,"2019":42563,"2026":5958,"2015":21829,"2010":14537,"2016":28262,"2012":53457,"2017":134734,"2014":13972,"2013":19463,"2011":39886,"2006":4399,"2008":24649,"2009":34693,"2005":6067,"2007":9953,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-02","composition":{"2018":46641,"2022":47278,"2023":72221,"2021":76791,"2025":66399,"2020":82827,"2024":69905,"2015":21821,"2019":42547,"2026":7958,"2012":53453,"2016":28172,"2017":134717,"2008":24622,"2009":34684,"2014":13935,"2010":14529,"2013":19456,"2011":39882,"2006":4398,"2005":6064,"2007":9946,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-03","composition":{"2023":72001,"2025":66092,"2024":69743,"2022":47221,"2021":76469,"2018":46580,"2020":82565,"2019":42499,"2015":21647,"2026":12114,"2010":14494,"2016":28145,"2012":53430,"2017":134676,"2014":13906,"2013":19444,"2011":39765,"2008":24608,"2009":34566,"2006":4397,"2005":6057,"2007":9938,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-04","composition":{"2021":76438,"2025":65899,"2022":47184,"2023":71932,"2024":69683,"2018":46568,"2026":13802,"2020":82535,"2019":42499,"2015":21641,"2012":53424,"2016":28137,"2017":134665,"2006":4396,"2010":14491,"2014":13904,"2013":19432,"2011":39759,"2008":24608,"2005":6057,"2009":34553,"2007":9937,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-05","composition":{"2022":46949,"2023":71633,"2025":65188,"2024":69500,"2021":76067,"2018":46533,"2020":82468,"2019":42465,"2015":21593,"2026":22343,"2010":14488,"2016":28127,"2012":53406,"2017":134598,"2014":13899,"2013":19408,"2011":39360,"2008":24570,"2009":34442,"2006":4392,"2005":6057,"2007":9933,"2004":17,"2002":439,"2003":46}}],"fossils":{"genesis":{"timestamp":1008690310,"file":"scipy_distutils/command/__init__.py","content":"\"\"\"distutils.command","year":"2001","commit":"f1a2d63","view_commit":"f1a2d6376c430f65550efa235209b86c1a0967e3","line":1},"survivor":{"timestamp":1017446578,"file":"numpy/lib/_polynomial_impl.py","content":"def poly(seq_of_zeros):","year":"2002","commit":"0562713","view_commit":"main","line":40}}} \ No newline at end of file diff --git a/data/react_data.json b/data/react_data.json deleted file mode 100644 index 0754501..0000000 --- a/data/react_data.json +++ /dev/null @@ -1 +0,0 @@ -{"snapshots":[{"snapshot_date":"2013-06","composition":{"2013":44058}},{"snapshot_date":"2013-09","composition":{"2013":56640}},{"snapshot_date":"2013-12","composition":{"2013":103418}},{"snapshot_date":"2014-03","composition":{"2014":14460,"2013":124672}},{"snapshot_date":"2014-06","composition":{"2013":122748,"2014":28040}},{"snapshot_date":"2014-09","composition":{"2013":120753,"2014":43942}},{"snapshot_date":"2014-12","composition":{"2013":115042,"2014":61057}},{"snapshot_date":"2015-03","composition":{"2015":12814,"2013":111731,"2014":68309}},{"snapshot_date":"2015-06","composition":{"2015":99379,"2014":64525,"2013":108918}},{"snapshot_date":"2015-09","composition":{"2015":124048,"2014":61377,"2013":99751}},{"snapshot_date":"2015-12","composition":{"2013":97720,"2015":146057,"2014":66057}},{"snapshot_date":"2016-03","composition":{"2015":152293,"2016":8454,"2014":65616,"2013":97372}},{"snapshot_date":"2016-06","composition":{"2013":92612,"2015":145240,"2014":58111,"2016":56315}},{"snapshot_date":"2016-09","composition":{"2016":97634,"2014":56984,"2015":128249,"2013":91397}},{"snapshot_date":"2016-12","composition":{"2013":87455,"2015":93943,"2014":53615,"2016":165406}},{"snapshot_date":"2017-03","composition":{"2016":166245,"2017":18369,"2014":53388,"2015":93195,"2013":87174}},{"snapshot_date":"2017-06","composition":{"2016":114802,"2017":97714,"2014":49195,"2013":80876,"2015":85734}},{"snapshot_date":"2017-09","composition":{"2016":100286,"2017":137649,"2014":44904,"2013":73677,"2015":81253}},{"snapshot_date":"2017-12","composition":{"2014":3296,"2016":37649,"2017":130122,"2015":9732,"2013":4642}},{"snapshot_date":"2018-03","composition":{"2017":133361,"2016":31594,"2018":25647,"2015":8897,"2013":4240,"2014":2846}},{"snapshot_date":"2018-06","composition":{"2013":4178,"2015":8798,"2014":2813,"2016":28955,"2017":125888,"2018":47644}},{"snapshot_date":"2018-09","composition":{"2017":123086,"2015":8701,"2016":27872,"2018":80461,"2014":2783,"2013":4085}},{"snapshot_date":"2018-12","composition":{"2017":119475,"2016":27687,"2013":4028,"2018":109144,"2015":8663,"2014":2745}},{"snapshot_date":"2019-03","composition":{"2018":105961,"2014":2745,"2017":119065,"2015":8661,"2016":27318,"2013":4026,"2019":17887}},{"snapshot_date":"2019-06","composition":{"2019":47716}},{"snapshot_date":"2019-09","composition":{"2019":144487,"2013":4006,"2015":8643,"2014":2733,"2017":116957,"2018":85535,"2016":26456}},{"snapshot_date":"2019-12","composition":{"2015":8633,"2017":115880,"2016":26323,"2014":2731,"2013":4006,"2019":173378,"2018":83042}},{"snapshot_date":"2020-03","composition":{"2014":2628,"2020":25299,"2019":161223,"2018":77097,"2016":25834,"2017":113668,"2013":3813,"2015":8533}},{"snapshot_date":"2020-06","composition":{"2020":107514,"2017":111700,"2019":151449,"2018":74527,"2013":3705,"2015":8475,"2016":24830,"2014":2635}},{"snapshot_date":"2020-09","composition":{"2019":134359,"2016":24617,"2018":71821,"2020":130649,"2017":109402,"2015":8375,"2014":2117,"2013":3432}},{"snapshot_date":"2020-12","composition":{"2015":8365,"2014":2109,"2016":24570,"2013":3381,"2019":132051,"2020":161173,"2017":109318,"2018":71602}},{"snapshot_date":"2021-03","composition":{"2017":106680,"2019":123247,"2021":14784,"2014":2109,"2018":71013,"2020":174962,"2013":3381,"2015":8365,"2016":24563}},{"snapshot_date":"2021-06","composition":{"2013":3366,"2018":64817,"2020":164228,"2021":51873,"2017":104862,"2015":8362,"2019":117274,"2014":2097,"2016":24443}},{"snapshot_date":"2021-09","composition":{"2019":114891,"2021":100659,"2020":157591,"2015":8299,"2014":2055,"2016":24342,"2013":3366,"2017":104459,"2018":64231}},{"snapshot_date":"2021-12","composition":{"2019":113210,"2021":132852,"2015":8119,"2017":103777,"2016":24244,"2014":2047,"2013":3347,"2020":151036,"2018":63731}},{"snapshot_date":"2022-03","composition":{"2020":149490,"2021":132083,"2019":111676,"2017":103610,"2022":20641,"2018":63553,"2015":8093,"2016":24233,"2013":3347,"2014":2036}},{"snapshot_date":"2022-06","composition":{"2021":128680,"2020":146266,"2014":2036,"2013":3343,"2015":8081,"2019":105978,"2017":103567,"2022":42376,"2018":62371,"2016":24217}},{"snapshot_date":"2022-09","composition":{"2021":125186,"2017":102835,"2019":101050,"2022":65451,"2020":141043,"2018":61418,"2015":8069,"2014":2036,"2016":24214,"2013":3343}},{"snapshot_date":"2022-12","composition":{"2019":97224,"2021":122925,"2020":136881,"2017":101550,"2015":8038,"2016":24156,"2018":59931,"2022":99388,"2013":3308,"2014":2030}},{"snapshot_date":"2023-03","composition":{"2019":95301,"2021":113457,"2022":78827,"2023":25935,"2020":102538,"2017":99498,"2015":7993,"2016":23711,"2013":3258,"2014":2005,"2018":58919}},{"snapshot_date":"2023-06","composition":{"2022":17170,"2021":61003,"2023":45427}},{"snapshot_date":"2023-09","composition":{"2023":278928,"2021":55707,"2022":9406}},{"snapshot_date":"2023-12","composition":{"2023":315798,"2021":58578,"2022":10022}},{"snapshot_date":"2024-03","composition":{"2021":74640}},{"snapshot_date":"2024-06","composition":{"2024":131231,"2023":389106,"2022":70231,"2019":76856,"2020":86835,"2021":110365,"2013":2827,"2015":6468,"2014":1438,"2017":91522,"2018":51463,"2016":21206}},{"snapshot_date":"2024-09","composition":{"2024":186038,"2020":85181,"2017":91329,"2015":6450,"2016":21094,"2019":74478,"2021":107827,"2014":1434,"2023":376610,"2013":2827,"2018":50963,"2022":67418}},{"snapshot_date":"2024-12","composition":{"2024":222258,"2023":371161,"2018":50452,"2021":106543,"2017":91147,"2019":73494,"2022":65464,"2013":2798,"2016":21016,"2015":6434,"2020":84332,"2014":1428}},{"snapshot_date":"2025-01","composition":{"2019":72870,"2024":235398,"2023":369720,"2020":83431,"2021":105680,"2022":64957,"2013":2798,"2018":49935,"2017":91080,"2015":6409,"2016":21010,"2014":1424,"2025":77}},{"snapshot_date":"2025-02","composition":{"2021":105294,"2019":72002,"2020":81905,"2013":2794,"2015":6344,"2014":1412,"2024":228979,"2017":90754,"2022":64653,"2023":146990,"2016":20945,"2018":48938,"2025":30519}},{"snapshot_date":"2025-03","composition":{"2024":226806,"2020":81740,"2025":43402,"2017":90722,"2015":6341,"2016":20945,"2019":70686,"2021":105234,"2023":145909,"2014":1412,"2013":2793,"2022":64519,"2018":48842}},{"snapshot_date":"2025-04","composition":{"2023":144980,"2021":105085,"2024":221556,"2025":76221,"2019":70526,"2020":81491,"2017":90713,"2022":63742,"2014":1412,"2013":2793,"2016":20943,"2015":6341,"2018":48764}},{"snapshot_date":"2025-05","composition":{"2024":220417,"2013":2793,"2016":20943,"2022":63508,"2015":6341,"2023":143478,"2018":48758,"2017":90707,"2019":70454,"2020":81428,"2014":1412,"2021":103829,"2025":95247}},{"snapshot_date":"2025-06","composition":{"2024":218219,"2025":112502,"2023":142802,"2021":103776,"2020":81289,"2013":2793,"2018":48737,"2017":90702,"2015":6341,"2019":70363,"2022":63378,"2016":20942,"2014":1412}},{"snapshot_date":"2025-07","composition":{"2024":215841,"2025":137355,"2019":70222,"2021":103736,"2023":142387,"2020":81261,"2017":90701,"2015":6341,"2016":20939,"2014":1412,"2013":2793,"2018":48694,"2022":63159}},{"snapshot_date":"2025-08","composition":{"2024":212777,"2020":81060,"2025":153793,"2019":69618,"2021":103392,"2023":141860,"2014":1412,"2017":90690,"2015":6334,"2016":20929,"2013":2793,"2018":48331,"2022":62740}},{"snapshot_date":"2025-09","composition":{"2024":209350,"2020":80847,"2025":185175,"2019":69500,"2021":103253,"2023":140410,"2013":2793,"2015":6334,"2014":1412,"2017":90685,"2016":20929,"2018":48318,"2022":62013}},{"snapshot_date":"2025-10","composition":{"2017":90684,"2015":6334,"2016":20929,"2019":69355,"2024":208263,"2021":103074,"2020":80758,"2025":199752,"2023":139982,"2014":1412,"2018":48312,"2022":61753,"2013":2793}},{"snapshot_date":"2025-11","composition":{"2024":207430,"2025":209692,"2019":69272,"2021":103036,"2020":80677,"2023":139651,"2014":1412,"2017":90684,"2015":6334,"2016":20929,"2018":48310,"2022":61693,"2013":2793}},{"snapshot_date":"2025-12","composition":{"2020":80599,"2024":205774,"2025":220530,"2021":103015,"2023":136868,"2019":69017,"2014":1412,"2013":2793,"2015":6334,"2017":90652,"2016":20915,"2018":48277,"2022":61679}},{"snapshot_date":"2026-01","composition":{"2026":19721,"2024":203679,"2020":80546,"2025":223578,"2014":1412,"2019":68844,"2021":102929,"2013":2793,"2015":4433,"2023":136219,"2016":20907,"2018":48198,"2022":61557,"2017":90634}},{"snapshot_date":"2026-02","composition":{"2026":33082,"2024":195521,"2025":215585,"2019":68808,"2021":102919,"2020":80409,"2013":2792,"2015":4433,"2014":1412,"2023":134343,"2017":90634,"2016":20907,"2018":48198,"2022":61380}},{"snapshot_date":"2026-03","composition":{"2026":38141,"2024":195277,"2025":215136,"2021":102784,"2019":68486,"2020":80280,"2017":90630,"2015":4433,"2016":20907,"2018":48092,"2022":61321,"2023":134227,"2013":2792,"2014":1412}},{"snapshot_date":"2026-04","composition":{"2026":42267,"2024":195277,"2025":215136,"2019":68486,"2021":102784,"2020":80280,"2016":20907,"2018":48092,"2022":61321,"2017":90630,"2023":134227,"2014":1412,"2015":4433,"2013":2792}},{"snapshot_date":"2026-05","composition":{"2026":45370,"2024":194885,"2025":214779,"2019":68003,"2021":102511,"2020":79804,"2017":89469,"2015":4433,"2016":20263,"2013":2792,"2014":1412,"2018":47405,"2022":61254,"2023":133996}}],"fossils":{"genesis":{"timestamp":1369760685,"file":"vendor/jasmine/jasmine-support.js","content":"return _it.call(this, desc, func);","year":"2013","commit":"c740373","view_commit":"c740373b311a2aa43a512f1bf53e1de72635c02a","line":40},"survivor":{"timestamp":1369856771,"file":".editorconfig","content":"root = true","year":"2013","commit":"75897c2","view_commit":"main","line":2}}} \ No newline at end of file diff --git a/data/zed_data.json b/data/zed_data.json deleted file mode 100644 index 46ed98a..0000000 --- a/data/zed_data.json +++ /dev/null @@ -1 +0,0 @@ -{"snapshots":[{"snapshot_date":"2021-03","composition":{"2021":25386}},{"snapshot_date":"2021-06","composition":{"2021":44895}},{"snapshot_date":"2021-09","composition":{"2021":76965}},{"snapshot_date":"2021-12","composition":{"2021":93984}},{"snapshot_date":"2022-03","composition":{"2021":73677,"2022":387601}},{"snapshot_date":"2022-06","composition":{"2021":51110,"2022":438719}},{"snapshot_date":"2022-09","composition":{"2021":47921,"2022":461332}},{"snapshot_date":"2022-12","composition":{"2022":491040,"2021":42941}},{"snapshot_date":"2023-03","composition":{"2022":475618,"2023":50000,"2021":39796}},{"snapshot_date":"2023-06","composition":{"2023":113724,"2022":453195,"2021":36084}},{"snapshot_date":"2023-09","composition":{"2022":444145,"2023":201662,"2021":35314}},{"snapshot_date":"2023-12","composition":{"2023":568521,"2022":439153,"2021":34913}},{"snapshot_date":"2024-03","composition":{"2024":190923,"2023":147169,"2022":410686,"2021":22875}},{"snapshot_date":"2024-06","composition":{"2024":309630,"2023":130635,"2022":66647,"2021":21876}},{"snapshot_date":"2024-09","composition":{"2024":441349,"2023":116356,"2021":20941,"2022":61998}},{"snapshot_date":"2024-12","composition":{"2024":522156,"2023":111931,"2022":58843,"2021":19618}},{"snapshot_date":"2025-01","composition":{"2025":87396,"2024":472990,"2023":106552,"2022":56478,"2021":19030}},{"snapshot_date":"2025-02","composition":{"2024":445662,"2025":136662,"2023":104622,"2022":55697,"2021":18947}},{"snapshot_date":"2025-03","composition":{"2025":232121,"2024":420096,"2023":100247,"2022":53002,"2021":18700}},{"snapshot_date":"2025-04","composition":{"2025":295774,"2024":405671,"2023":98891,"2021":18379,"2022":51571}},{"snapshot_date":"2025-05","composition":{"2025":383630,"2024":388978,"2023":97949,"2022":51077,"2021":18342}},{"snapshot_date":"2025-06","composition":{"2025":435520,"2024":381859,"2023":96906,"2022":50493,"2021":18310}},{"snapshot_date":"2025-07","composition":{"2024":370918,"2025":505687,"2023":96078,"2022":50134,"2021":18028}},{"snapshot_date":"2025-08","composition":{"2025":587332,"2024":342164,"2023":94185,"2022":49275,"2021":17791}},{"snapshot_date":"2025-09","composition":{"2025":647281,"2024":322881,"2023":89353,"2022":48437,"2021":17312}},{"snapshot_date":"2025-10","composition":{"2025":785450,"2024":313650,"2023":87163,"2022":47531,"2021":16776}},{"snapshot_date":"2025-11","composition":{"2025":829130,"2024":308220,"2023":85858,"2022":46995,"2021":16515}},{"snapshot_date":"2025-12","composition":{"2025":878519,"2024":303362,"2023":84983,"2022":46559,"2021":16345}},{"snapshot_date":"2026-01","composition":{"2025":827244,"2026":155517,"2024":296030,"2023":82991,"2022":46183,"2021":16051}},{"snapshot_date":"2026-02","composition":{"2026":287230,"2024":286996,"2023":81761,"2025":792483,"2022":44769,"2021":15713}},{"snapshot_date":"2026-03","composition":{"2026":424322,"2025":759181,"2023":80738,"2024":271972,"2022":44357,"2021":15498}},{"snapshot_date":"2026-04","composition":{"2026":449429,"2024":270386,"2025":753702,"2023":80586,"2022":43769,"2021":15325}},{"snapshot_date":"2026-05","composition":{"2025":738034,"2024":267615,"2026":537143,"2023":78970,"2022":43488,"2021":15261}}],"fossils":{"genesis":{"timestamp":1613840554,"file":"Cargo.toml","content":"[workspace]","year":"2021","commit":"b400449","view_commit":"b400449a58507cca1fa007197929c2cfd6beabbe","line":1},"survivor":{"timestamp":1613840554,"file":"Cargo.toml","content":"[workspace]","year":"2021","commit":"b400449","view_commit":"main","line":1}}} \ No newline at end of file diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/_utils.py b/scripts/_utils.py new file mode 100644 index 0000000..115d1b7 --- /dev/null +++ b/scripts/_utils.py @@ -0,0 +1,84 @@ +""" +Shared utilities for Theseus data pipeline scripts. + +Consolidates helpers that were previously duplicated across +``analyse_repository.py`` and ``add_fossils.py``: + +* ``run_command`` — safe subprocess wrapper with utf-8 handling +* ``get_default_branch`` — determine a repo's default git branch +""" + +import subprocess +import logging + +logger = logging.getLogger(__name__) + + +def run_command(cmd: list[str], cwd: str | None = None) -> str: + """ + Execute a shell command and return its standard output. + + :param cmd: List of arguments forming the command. + :param cwd: Directory path where the command should be executed. + :return: Decoded standard output of the command, stripped. + """ + try: + result = subprocess.run( + cmd, + cwd=str(cwd) if cwd else None, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + encoding="utf-8", + errors="replace", + check=True, + ) + return result.stdout.strip() + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"Command failed: {' '.join(str(c) for c in cmd)} " + f"(exit {e.returncode}) — {e.stderr.strip()}" + ) from e + + +def get_default_branch(repo_path: str | None = None) -> str: + """ + Determine the default branch name for a git repository. + + Tries, in order: + + 1. ``git symbolic-ref --short refs/remotes/origin/HEAD`` + 2. ``git rev-parse --abbrev-ref origin/HEAD`` + 3. ``git rev-parse --verify origin/main`` + 4. ``git rev-parse --verify origin/master`` + 5. ``git rev-parse --verify origin/develop`` + 6. Falls back to ``"HEAD"`` + + :param repo_path: Path to the git repository (or ``None`` for CWD). + :return: Default branch name (e.g. ``"main"``, ``"master"``). + """ + for strategy in [ + ["git", "symbolic-ref", "--short", "refs/remotes/origin/HEAD"], + ["git", "rev-parse", "--abbrev-ref", "origin/HEAD"], + ]: + try: + result = run_command(strategy, cwd=repo_path) + branch = ( + result[len("origin/"):] if result.startswith("origin/") else result + ) + if branch: + return branch + except RuntimeError: + continue + + for branch in ("main", "master", "develop"): + try: + run_command( + ["git", "rev-parse", "--verify", f"origin/{branch}"], + cwd=repo_path, + ) + return branch + except RuntimeError: + continue + + return "HEAD" diff --git a/scripts/add_fossils.py b/scripts/add_fossils.py index 98209fd..2a8ea14 100644 --- a/scripts/add_fossils.py +++ b/scripts/add_fossils.py @@ -23,11 +23,17 @@ import json import logging import os -import subprocess import sys from datetime import datetime, timezone from pathlib import Path +# Ensure sibling imports from _utils work in all invocation contexts +_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +if _SCRIPTS_DIR not in sys.path: + sys.path.insert(0, _SCRIPTS_DIR) + +from _utils import get_default_branch, run_command + logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") logger = logging.getLogger(__name__) @@ -37,24 +43,6 @@ # --------------------------------------------------------------------------- -def _run_command(cmd, cwd=None): - try: - result = subprocess.run( - cmd, - cwd=str(cwd) if cwd else None, - capture_output=True, - text=True, - check=True, - encoding="utf-8", - errors="replace", - ) - return result.stdout.strip() - except subprocess.CalledProcessError as e: - raise RuntimeError( - f"Command failed: {' '.join(str(c) for c in cmd)} — {e.stderr}" - ) from e - - def _blank_fossil(): return { "timestamp": 2_147_483_647, @@ -70,7 +58,7 @@ def _blank_fossil(): def _blame_file(repo_path, file_path, view_commit=""): """Run git blame --line-porcelain on a single file and return the oldest fossil found.""" try: - blame_output = _run_command( + blame_output = run_command( ["git", "blame", "--line-porcelain", file_path], cwd=repo_path, ) @@ -86,7 +74,7 @@ def _blame_file(repo_path, file_path, view_commit=""): line_num += 1 timestamp = current_commit_data.get("author-time") content = line.lstrip("\t").strip() - if timestamp and timestamp < fossil["timestamp"] and content: + if timestamp is not None and timestamp < fossil["timestamp"] and content: fossil["timestamp"] = timestamp fossil["file"] = file_path fossil["content"] = content @@ -129,7 +117,7 @@ def _blame_files_parallel(repo_path, files, view_commit="", max_workers=20): def _get_tracked_files(repo_path): """Return a list of files that are tracked by git and exist on disk.""" - files_output = _run_command(["git", "ls-files"], cwd=repo_path) + files_output = run_command(["git", "ls-files"], cwd=repo_path) return [ f for f in files_output.splitlines() @@ -137,41 +125,68 @@ def _get_tracked_files(repo_path): ] -def _get_default_branch(repo_path): - """Figure out the default branch name (main vs master vs something else).""" - # Try the symref approach first (works with a full clone) - for strategy in [ - ["git", "symbolic-ref", "--short", "refs/remotes/origin/HEAD"], - ["git", "rev-parse", "--abbrev-ref", "origin/HEAD"], - ]: - try: - result = _run_command(strategy, cwd=repo_path) - # Strip the "origin/" prefix if present without collapsing slashes - branch = result[len("origin/") :] if result.startswith("origin/") else result - if branch: - return branch - except RuntimeError: - continue - - # Fall back to checking which of the usual suspects exists - for branch in ("main", "master", "develop"): - try: - _run_command( - ["git", "rev-parse", "--verify", f"origin/{branch}"], cwd=repo_path - ) - return branch - except RuntimeError: - continue - - return "HEAD" +def _get_files_added_in_commit(repo_path, commit_hash): + """ + Return files that were *added* (not modified, not renamed) by this commit. + + Uses ``git diff-tree --diff-filter=A`` which only lists new files + introduced in the commit, compared to its parent(s). For the root + commit (no parent) the command fails so we fall back to ``git ls-files``. + + Complexity + ---------- + Before (``_get_tracked_files``): + O(all_tracked_files) per commit — every file at that checkout is + blamed, even files that were added centuries earlier. + + After (``_get_files_added_in_commit``): + O(added_files_only) per commit — only files that first appear in + this commit are blamed. Files from older commits were already + handled in earlier iterations of the genesis loop, so re-blaming + them is redundant. + + Why this is safe + ---------------- + ``git blame --line-porcelain`` traces each line back to the commit + that *last modified* that line. If a file was added at commit K and + never touched again, blaming it at K or at any later commit returns + the same author-time == K. If a file was added at K and modified at + K+2, the modified lines will show author-time == K+2, which is never + older than K. Therefore the oldest line of any file is found by + blaming that file exactly once — at the commit where it first + appeared in the tree. + """ + try: + # For non-root commits — compare against parent(s) + files_output = run_command( + [ + "git", + "diff-tree", + "--no-commit-id", + "-r", + "--diff-filter=A", + "--name-only", + commit_hash, + ], + cwd=repo_path, + ) + return files_output.splitlines() if files_output else [] + except RuntimeError: + # Root commit has no parent — all tracked files are "new" + files_output = run_command(["git", "ls-files"], cwd=repo_path) + return files_output.splitlines() def _fossil_identity(fossil: dict) -> tuple: """Return a hashable key that identifies which line this fossil refers to. - We use file + line-number + blame commit (the actual authoring commit). - This detects when the living fossil moves to a different line or file. + + Uses (file, blame_commit) — the authoring commit uniquely identifies the + content. Line numbers are intentionally excluded: a line that stays in + the same file but shifts position (due to insertions/deletions above it) + is still the same fossil. Only a change in file or authoring commit + (meaning the line was actually rewritten) counts as a different fossil. """ - return (fossil.get("file", ""), fossil.get("line", 0), fossil.get("commit", "")) + return (fossil.get("file", ""), fossil.get("commit", "")) # --------------------------------------------------------------------------- @@ -191,7 +206,7 @@ def get_genesis_fossil(repo_path, genesis_depth=50): logger.info("Computing Genesis (Historical) fossil...") # Get every commit with its author-time so we can sort by actual authorship date - log_output = _run_command( + log_output = run_command( ["git", "log", "--all", "--pretty=format:%H %at"], cwd=repo_path, ) @@ -224,12 +239,16 @@ def get_genesis_fossil(repo_path, genesis_depth=50): author_ts, ) try: - _run_command(["git", "checkout", "--force", commit], cwd=repo_path) + run_command(["git", "checkout", "--force", commit], cwd=repo_path) except RuntimeError as e: logger.warning(" Could not checkout %s: %s", commit[:7], e) continue - files = _get_tracked_files(repo_path) + # Only blame files that were *added* in this commit, not every + # tracked file. Files added in older commits have already been + # blamed in previous loop iterations — re-blaming them is wasted work. + # See _get_files_added_in_commit for the full reasoning. + files = _get_files_added_in_commit(repo_path, commit) if not files: continue @@ -254,17 +273,17 @@ def get_survivor_fossil(repo_path): """ logger.info("Computing Survivor (Living) fossil...") - default_branch = _get_default_branch(repo_path) + default_branch = get_default_branch(repo_path) logger.info(" Checking out default branch: %s", default_branch) try: - _run_command( + run_command( ["git", "checkout", "-B", default_branch, f"origin/{default_branch}"], cwd=repo_path, ) except RuntimeError: # Detached HEAD fallback - _run_command( + run_command( ["git", "checkout", "--force", f"origin/{default_branch}"], cwd=repo_path ) @@ -326,11 +345,11 @@ def backfill_fossils(data_dir, repo_urls): local_repo = temp_dir / repo_name if not local_repo.exists(): logger.info(" Cloning %s...", repo_url) - _run_command(["git", "clone", repo_url, str(local_repo)]) + run_command(["git", "clone", repo_url, str(local_repo)]) else: logger.info(" Repo already cloned — fetching latest...") try: - _run_command(["git", "fetch", "--all"], cwd=local_repo) + run_command(["git", "fetch", "--all"], cwd=local_repo) except RuntimeError as e: logger.warning(" Fetch failed (continuing with local): %s", e) @@ -445,11 +464,11 @@ def update_survivor_fossils(data_dir, repo_urls): local_repo = temp_dir / repo_name if not local_repo.exists(): logger.info(" Cloning %s...", repo_url) - _run_command(["git", "clone", repo_url, str(local_repo)]) + run_command(["git", "clone", repo_url, str(local_repo)]) else: logger.info(" Fetching latest...") try: - _run_command(["git", "fetch", "--all"], cwd=local_repo) + run_command(["git", "fetch", "--all"], cwd=local_repo) except RuntimeError as e: logger.warning(" Fetch failed (continuing with local): %s", e) diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 5f6ac07..5ecb513 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -12,40 +12,20 @@ import os import shutil import stat -import subprocess import sys import time from collections import defaultdict from datetime import datetime, timezone from itertools import groupby -logger = logging.getLogger(__name__) - +# Ensure sibling imports from _utils work in all invocation contexts +_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +if _SCRIPTS_DIR not in sys.path: + sys.path.insert(0, _SCRIPTS_DIR) -def _run_command(cmd: list[str], cwd: str | None = None) -> str: - """ - Execute a shell command and return its standard output +from _utils import run_command, get_default_branch - :param cmd: List of arguments forming the command. - :param cwd: Directory path where the command should be executed. - :return: Decoded standard output of the command. - """ - try: - result = subprocess.run( - cmd, - cwd=cwd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - encoding="utf-8", - errors="replace", - check=True, - ) - return result.stdout.strip() - except subprocess.CalledProcessError as e: - raise RuntimeError( - f"Command '{' '.join(cmd)}' failed with exit code {e.returncode}" - ) from e +logger = logging.getLogger(__name__) def clone_repository(repo_slug: str, clone_dir: str) -> None: @@ -57,7 +37,7 @@ def clone_repository(repo_slug: str, clone_dir: str) -> None: """ logger.info("Cloning %s into %s...", repo_slug, clone_dir) repo_url = f"https://github.com/{repo_slug}.git" - _run_command(["git", "clone", repo_url, clone_dir]) + run_command(["git", "clone", repo_url, clone_dir]) def get_snapshots(repo_path: str) -> list[tuple[str, str]]: @@ -69,7 +49,7 @@ def get_snapshots(repo_path: str) -> list[tuple[str, str]]: :param repo_path: Path to the git repository. :return: A list of tuples, each containing a 'YYYY-MM' period and the corresponding commit hash. """ - log_output = _run_command( + log_output = run_command( cmd=["git", "log", "--pretty=format:%H|%cI"], cwd=repo_path ) @@ -137,7 +117,7 @@ def _blame_single_file(repo_path: str, file: str) -> dict[str, int]: Designed to be run concurrently in a ThreadPool. """ try: - blame_output = _run_command( + blame_output = run_command( ["git", "blame", "--line-porcelain", file], cwd=repo_path ) return _parse_blame_output(blame_output) @@ -153,8 +133,8 @@ def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: :param commit_hash: Hash of the commit to analyze :return: Dictionary mapping birth year to line count """ - _run_command(["git", "checkout", commit_hash], cwd=repo_path) - files_output = _run_command(["git", "ls-files"], cwd=repo_path) + run_command(["git", "checkout", commit_hash], cwd=repo_path) + files_output = run_command(["git", "ls-files"], cwd=repo_path) files = files_output.splitlines() age_distribution = defaultdict(int) @@ -239,14 +219,18 @@ def process_repository(repo_slug: str, data_dir: str) -> None: logger.info( "Repository %s already exists locally. Fetching latest...", repo_name ) - _run_command(["git", "fetch", "--all"], cwd=temp_repo_path) - for branch in ["main", "master"]: - try: - _run_command(["git", "checkout", branch], cwd=temp_repo_path) - break - except RuntimeError: - continue - _run_command(["git", "pull"], cwd=temp_repo_path) + run_command(["git", "fetch", "--all"], cwd=temp_repo_path) + default_branch = get_default_branch(temp_repo_path) + if default_branch == "HEAD": + raise RuntimeError( + f"[{repo_name}] Cannot determine default branch after fetch. " + "Tried: main, master, develop, origin/HEAD." + ) + run_command( + ["git", "checkout", "-B", default_branch, f"origin/{default_branch}"], + cwd=temp_repo_path, + ) + run_command(["git", "pull"], cwd=temp_repo_path) state = load_existing_state(output_json_path) historical_snapshots = state["snapshots"] From 708291fdd09354e291d26ad9fd38b51bb93011a3 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Fri, 29 May 2026 23:44:39 +0530 Subject: [PATCH 2/9] #33 fixed some script concerns --- scripts/_utils.py | 24 ++++++++- scripts/add_fossils.py | 97 ++++++++++++++++++++++++++--------- scripts/analyse_repository.py | 18 +++---- scripts/cleanup_data.py | 20 ++++---- 4 files changed, 113 insertions(+), 46 deletions(-) diff --git a/scripts/_utils.py b/scripts/_utils.py index 115d1b7..3fe3266 100644 --- a/scripts/_utils.py +++ b/scripts/_utils.py @@ -8,8 +8,10 @@ * ``get_default_branch`` — determine a repo's default git branch """ -import subprocess +import json import logging +import subprocess +import sys logger = logging.getLogger(__name__) @@ -41,6 +43,26 @@ def run_command(cmd: list[str], cwd: str | None = None) -> str: ) from e +def load_config(config_path: str = "theseus.config.json") -> dict: + """ + Load and return the project configuration file (``theseus.config.json``). + + Exits with status 1 if the file is missing or malformed. + + :param config_path: Path to the JSON configuration file. + :return: Parsed configuration dictionary. + """ + try: + with open(config_path, "r", encoding="utf-8") as f: + return json.load(f) + except FileNotFoundError: + logger.error("Configuration file not found: %s", config_path) + sys.exit(1) + except json.JSONDecodeError as e: + logger.error("Configuration file %s is malformed: %s", config_path, e) + sys.exit(1) + + def get_default_branch(repo_path: str | None = None) -> str: """ Determine the default branch name for a git repository. diff --git a/scripts/add_fossils.py b/scripts/add_fossils.py index 2a8ea14..b99f866 100644 --- a/scripts/add_fossils.py +++ b/scripts/add_fossils.py @@ -32,7 +32,7 @@ if _SCRIPTS_DIR not in sys.path: sys.path.insert(0, _SCRIPTS_DIR) -from _utils import get_default_branch, run_command +from _utils import get_default_branch, run_command, load_config logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") logger = logging.getLogger(__name__) @@ -43,7 +43,7 @@ # --------------------------------------------------------------------------- -def _blank_fossil(): +def _blank_fossil() -> dict: return { "timestamp": 2_147_483_647, "file": "", @@ -55,7 +55,7 @@ def _blank_fossil(): } -def _blame_file(repo_path, file_path, view_commit=""): +def _blame_file(repo_path: str | Path, file_path: str, view_commit: str = "") -> dict: """Run git blame --line-porcelain on a single file and return the oldest fossil found.""" try: blame_output = run_command( @@ -99,7 +99,9 @@ def _blame_file(repo_path, file_path, view_commit=""): return fossil -def _blame_files_parallel(repo_path, files, view_commit="", max_workers=20): +def _blame_files_parallel( + repo_path: str | Path, files: list[str], view_commit: str = "", max_workers: int = 20 +) -> dict: """Blame a list of files in parallel and return the single oldest fossil found.""" global_oldest = _blank_fossil() @@ -115,7 +117,7 @@ def _blame_files_parallel(repo_path, files, view_commit="", max_workers=20): return global_oldest -def _get_tracked_files(repo_path): +def _get_tracked_files(repo_path: str | Path) -> list[str]: """Return a list of files that are tracked by git and exist on disk.""" files_output = run_command(["git", "ls-files"], cwd=repo_path) return [ @@ -125,7 +127,7 @@ def _get_tracked_files(repo_path): ] -def _get_files_added_in_commit(repo_path, commit_hash): +def _get_files_added_in_commit(repo_path: str | Path, commit_hash: str) -> list[str]: """ Return files that were *added* (not modified, not renamed) by this commit. @@ -194,14 +196,46 @@ def _fossil_identity(fossil: dict) -> tuple: # --------------------------------------------------------------------------- -def get_genesis_fossil(repo_path, genesis_depth=50): +def get_genesis_fossil( + repo_path: str | Path, + genesis_depth: int = 50, + stale_limit: int = 5, +) -> dict: """ Historical Fossil: the oldest line **ever authored** in this repo. - Strategy: Sort ALL commits by author-time (not committer-time), take the - oldest genesis_depth ones, and blame them. This correctly handles repos - migrated from SVN/Mercurial where old authored lines may appear in commits - with much later committer timestamps. + Strategy + -------- + Sort ALL commits by author-time (not committer-time), then scan the oldest + ``genesis_depth`` commits. This correctly handles repos migrated from + SVN/Mercurial where old authored lines may appear in commits with much + later committer timestamps. + + Early-exit heuristic + ~~~~~~~~~~~~~~~~~~~~ + Once a fossil has been found, if ``stale_limit`` consecutive older commits + fail to improve it (no line with a smaller author-time), the scan stops. + The assumption is that if a long stretch of early commits doesn't contain + anything older than what we already have, no older line exists anywhere. + + Why this is safe + ~~~~~~~~~~~~~~~~ + The very first commit (lowest author-time) is always scanned first. If the + oldest code was added in one of the earliest commits, it will be found + immediately. The stale-limit window (default 5) gives enough room for + repos where the first commit only contained a README and the real code was + added in a slightly later commit, while stopping *well* before 50 in the + common case. + + Before (hardcoded 50) + Worst case: 50 blame passes over the full file tree at each commit. + Even with the ``_get_files_added_in_commit`` optimisation, scanning 50 + commits is unnecessary for most repos. + + After (adaptive stale-limit + hard cap) + Most repos stop after 5--10 commits. Edge cases (e.g. a repo with + many distinct old commits that each add new source files) still have + the hard safety cap of ``genesis_depth=50``. """ logger.info("Computing Genesis (Historical) fossil...") @@ -211,7 +245,7 @@ def get_genesis_fossil(repo_path, genesis_depth=50): cwd=repo_path, ) - commit_pairs = [] + commit_pairs: list[tuple[str, int]] = [] for line in log_output.splitlines(): parts = line.strip().split(" ", 1) if len(parts) == 2: @@ -229,6 +263,7 @@ def get_genesis_fossil(repo_path, genesis_depth=50): oldest_commits = [(c[0], c[1]) for c in commit_pairs[:genesis_depth]] global_oldest = _blank_fossil() + stale_count = 0 for i, (commit, author_ts) in enumerate(oldest_commits): logger.info( @@ -250,12 +285,33 @@ def get_genesis_fossil(repo_path, genesis_depth=50): # See _get_files_added_in_commit for the full reasoning. files = _get_files_added_in_commit(repo_path, commit) if not files: + stale_count += 1 + if stale_count >= stale_limit: + logger.info( + " Stopping early after %d commits (%d consecutive with no new " + "files to blame).", + i + 1, + stale_limit, + ) + break continue fossil = _blame_files_parallel(repo_path, files, view_commit=commit) if fossil["file"] and fossil["timestamp"] < global_oldest["timestamp"]: global_oldest = fossil + stale_count = 0 + else: + stale_count += 1 + + if stale_count >= stale_limit: + logger.info( + " Stopping early after %d commits (%d consecutive without " + "improvement).", + i + 1, + stale_limit, + ) + break return global_oldest @@ -265,7 +321,7 @@ def get_genesis_fossil(repo_path, genesis_depth=50): # --------------------------------------------------------------------------- -def get_survivor_fossil(repo_path): +def get_survivor_fossil(repo_path: str | Path) -> dict: """ Living Fossil: the oldest line that is **still alive** in the codebase today. @@ -305,7 +361,7 @@ def get_survivor_fossil(repo_path): # --------------------------------------------------------------------------- -def backfill_fossils(data_dir, repo_urls): +def backfill_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: """ For every repo JSON in data_dir, recompute both fossils without touching snapshots. Always forces a fresh recompute of both genesis and survivor. @@ -412,7 +468,7 @@ def backfill_fossils(data_dir, repo_urls): # --------------------------------------------------------------------------- -def update_survivor_fossils(data_dir, repo_urls): +def update_survivor_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: # pylint: disable=too-many-locals,too-many-branches,too-many-statements """ Refresh only the Survivor (Living) fossil for each repo. @@ -533,19 +589,12 @@ def update_survivor_fossils(data_dir, repo_urls): # --------------------------------------------------------------------------- -def main(): +def main() -> None: # pylint: disable=duplicate-code """ Main entry point for fossil backfill and incremental survivor checking. """ - config_path = "theseus.config.json" - if not os.path.exists(config_path): - logger.error("Configuration file not found: %s", config_path) - sys.exit(1) - - with open(config_path, "r", encoding="utf-8") as f: - config = json.load(f) - + config = load_config() data_dir = config.get("dataDir", "./data") # Build dynamically from config: name -> github URL diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 5ecb513..1c9225d 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -23,7 +23,7 @@ if _SCRIPTS_DIR not in sys.path: sys.path.insert(0, _SCRIPTS_DIR) -from _utils import run_command, get_default_branch +from _utils import run_command, get_default_branch, load_config logger = logging.getLogger(__name__) @@ -141,8 +141,11 @@ def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: valid_files = [f for f in files if os.path.isfile(os.path.join(repo_path, f))] - # Safe BLAME_WORKERS parsing with fallback - max_workers = min(20, (os.cpu_count() or 1) * 2) + # Safe BLAME_WORKERS parsing with fallback. + # Default caps at 8 to avoid I/O contention on HDDs (git blame is + # I/O-bound, not CPU-bound, so the CPU-count multiplier doesn't apply). + # Override via BLAME_WORKERS env var (clamped 1-100). + max_workers = min(8, (os.cpu_count() or 1) * 2) try: if "BLAME_WORKERS" in os.environ: max_workers = max(1, min(int(os.environ["BLAME_WORKERS"]), 100)) @@ -364,14 +367,7 @@ def main(): datefmt="%Y-%m-%d %H:%M:%S", ) - config_path = "theseus.config.json" - if not os.path.exists(config_path): - logger.error("Configuration file not found: %s", config_path) - sys.exit(1) - - with open(config_path, "r", encoding="utf-8") as f: - config = json.load(f) - + config = load_config() DATA_OUTPUT_DIR = config.get("dataDir", "./data") os.makedirs(DATA_OUTPUT_DIR, exist_ok=True) diff --git a/scripts/cleanup_data.py b/scripts/cleanup_data.py index 1b7369b..2180312 100644 --- a/scripts/cleanup_data.py +++ b/scripts/cleanup_data.py @@ -3,8 +3,16 @@ """ import json +import os +import sys from pathlib import Path +_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +if _SCRIPTS_DIR not in sys.path: + sys.path.insert(0, _SCRIPTS_DIR) + +from _utils import load_config + def cleanup_data(data_dir: str) -> bool: """ @@ -36,7 +44,7 @@ def cleanup_data(data_dir: str) -> bool: data = json.load(f) # Handle both list and object schemas - snapshots = data.get("snapshots", data) if isinstance(data, dict) else data + snapshots = data.get("snapshots", []) if isinstance(data, dict) else data for snapshot in snapshots: # 1. Remove redundant total_lines @@ -71,15 +79,7 @@ def cleanup_data(data_dir: str) -> bool: return had_failures def main(): - import sys - config_path = "theseus.config.json" - if not Path(config_path).exists(): - print(f"Configuration file not found: {config_path}") - sys.exit(1) - - with open(config_path, "r", encoding="utf-8") as f: - config = json.load(f) - + config = load_config() data_dir = config.get("dataDir", "./data") if cleanup_data(data_dir): print("One or more files failed to clean up. Exiting non-zero.") From 28e47921ede26b4cf1b2c30b5c4a91a49a47a015 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Fri, 29 May 2026 23:48:47 +0530 Subject: [PATCH 3/9] #33 fixed minor bugs --- scripts/add_fossils.py | 4 ++-- scripts/analyse_repository.py | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/scripts/add_fossils.py b/scripts/add_fossils.py index b99f866..93db760 100644 --- a/scripts/add_fossils.py +++ b/scripts/add_fossils.py @@ -34,7 +34,6 @@ from _utils import get_default_branch, run_command, load_config -logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") logger = logging.getLogger(__name__) @@ -88,7 +87,7 @@ def _blame_file(repo_path: str | Path, file_path: str, view_commit: str = "") -> fossil["line"] = line_num else: parts = line.split(" ") - if parts and len(parts[0]) in (40, 64): + if parts and len(parts[0]) in (40, 64) and all(c in "0123456789abcdef" for c in parts[0].lower()): current_commit_data = {"commit": parts[0]} elif line.startswith("author-time ") and len(parts) >= 2: try: @@ -594,6 +593,7 @@ def main() -> None: """ Main entry point for fossil backfill and incremental survivor checking. """ + logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") config = load_config() data_dir = config.get("dataDir", "./data") diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 1c9225d..8df1010 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -96,7 +96,7 @@ def _parse_blame_output(blame_output: str) -> dict[str, int]: file_distribution[year] += 1 else: parts = line.split(" ") - if len(parts[0]) in (40, 64): + if len(parts[0]) in (40, 64) and all(c in "0123456789abcdef" for c in parts[0].lower()): current_commit = parts[0] elif parts[0] == "author-time": try: @@ -212,7 +212,9 @@ def process_repository(repo_slug: str, data_dir: str) -> None: :param data_dir: Path where the resulting JSON data will be saved. """ repo_name = repo_slug.split("/")[-1] - temp_repo_path = f"./temp_workdir_{repo_name}" + # Use the full slug (org/repo) in the temp dir name to avoid collisions + # when two different orgs have repos with the same name. + temp_repo_path = f"./temp_workdir_{repo_slug.replace('/', '__')}" output_json_path = os.path.join(data_dir, f"{repo_name}_data.json") try: @@ -326,7 +328,6 @@ def process_repository(repo_slug: str, data_dir: str) -> None: finally: if os.path.exists(temp_repo_path): logger.info("Cleaning up temporary directory: %s", temp_repo_path) - time.sleep(1) def handle_remove_readonly(func, path, _exc_info): """Handle permission errors on Windows/Unix by adding write permission.""" @@ -347,8 +348,14 @@ def handle_remove_readonly(func, path, _exc_info): break except Exception as e: # pylint: disable=broad-exception-caught if attempt < 2: - time.sleep(1) - logger.warning("Cleanup attempt %d failed: %s", attempt + 1, e) + backoff = 2 ** attempt + logger.warning( + "Cleanup attempt %d failed, retrying in %ds: %s", + attempt + 1, + backoff, + e, + ) + time.sleep(backoff) else: logger.error( "Failed to clean up temporary directory after 3 attempts: %s", From a807c516bb93440de5858e31942a03d583ad0055 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sun, 31 May 2026 12:32:52 +0530 Subject: [PATCH 4/9] #33 add argparse for repo only --- scripts/_utils.py | 65 ++++++++++++++ scripts/add_fossils.py | 10 ++- scripts/analyse_repository.py | 140 +++++++++++++++++++------------ tests/test_analyse_repository.py | 44 +++++++++- 4 files changed, 202 insertions(+), 57 deletions(-) diff --git a/scripts/_utils.py b/scripts/_utils.py index 3fe3266..0dd75e7 100644 --- a/scripts/_utils.py +++ b/scripts/_utils.py @@ -10,8 +10,11 @@ import json import logging +import os +import shutil import subprocess import sys +import time logger = logging.getLogger(__name__) @@ -104,3 +107,65 @@ def get_default_branch(repo_path: str | None = None) -> str: continue return "HEAD" + + +def remove_path(path: str) -> None: + """ + Remove a file or directory using OS-native fast deletion. + + Uses ``cmd /c rd /s /q`` on Windows and ``rm -rf`` on Unix, + falling back to ``shutil.rmtree`` on failure. + + :param path: Path to the file or directory to remove. + """ + if not os.path.exists(path): + return + + if os.name == "nt": + try: + subprocess.run( + ["cmd", "/c", "rd", "/s", "/q", path], + capture_output=True, + timeout=30, + ) + if not os.path.exists(path): + return + except (subprocess.SubprocessError, OSError): + pass + else: + try: + subprocess.run( + ["rm", "-rf", path], + capture_output=True, + timeout=30, + ) + if not os.path.exists(path): + return + except (subprocess.SubprocessError, OSError): + pass + + # Fallback: retry with shutil.rmtree + for attempt in range(3): + try: + shutil.rmtree(path, ignore_errors=False) + + def handle_remove_readonly(func, path, _exc_info): + try: + current_mode = os.stat(path).st_mode + os.chmod( + path, + current_mode | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH, + ) + func(path) + except PermissionError: + pass + except Exception: # noqa: BLE001 + pass + + shutil.rmtree(path, onexc=handle_remove_readonly) + break + except Exception: # noqa: BLE001 + if attempt < 2: + time.sleep(2 ** attempt) + else: + logger.warning("Failed to clean up %s after 3 attempts", path) diff --git a/scripts/add_fossils.py b/scripts/add_fossils.py index 93db760..06a3590 100644 --- a/scripts/add_fossils.py +++ b/scripts/add_fossils.py @@ -32,7 +32,7 @@ if _SCRIPTS_DIR not in sys.path: sys.path.insert(0, _SCRIPTS_DIR) -from _utils import get_default_branch, run_command, load_config +from _utils import get_default_branch, run_command, load_config, remove_path logger = logging.getLogger(__name__) @@ -459,6 +459,10 @@ def backfill_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: logger.error(" ✗ Error computing fossils for %s: %s", repo_name, e) had_failures = True + # Clean up temp repos + if temp_dir.exists(): + remove_path(str(temp_dir)) + return had_failures @@ -579,6 +583,10 @@ def update_survivor_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: logger.error(" ✗ Error updating survivor for %s: %s", repo_name, e) had_failures = True + # Clean up temp repos + if temp_dir.exists(): + remove_path(str(temp_dir)) + logger.info("\nSurvivor update complete. %d repo(s) updated.", updated_count) return had_failures diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 8df1010..c5e0c45 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -6,12 +6,11 @@ Fossil computation is handled separately by add_fossils.py. """ +import argparse import concurrent.futures import json import logging import os -import shutil -import stat import sys import time from collections import defaultdict @@ -23,7 +22,7 @@ if _SCRIPTS_DIR not in sys.path: sys.path.insert(0, _SCRIPTS_DIR) -from _utils import run_command, get_default_branch, load_config +from _utils import run_command, get_default_branch, load_config, remove_path logger = logging.getLogger(__name__) @@ -197,7 +196,30 @@ def _atomic_write_json( os.replace(tmp_path, json_path) -def process_repository(repo_slug: str, data_dir: str) -> None: +def _filter_snapshots( + all_snapshots: list[tuple[str, str]], + processed_periods: set[str], + reprocess: str | None = None, +) -> list[tuple[str, str]]: + """ + Filter a list of (period, commit) snapshots down to unprocessed entries. + + When *reprocess* is provided (``YYYY-MM``), that specific period is + included regardless of whether it exists in *processed_periods*. + + :param all_snapshots: Full list of (period, commit) tuples. + :param processed_periods: Set of period strings that have already been processed. + :param reprocess: Optional period to re-run (e.g. ``"2023-06"``). + :return: List of (period, commit) tuples that need processing. + """ + result: list[tuple[str, str]] = [] + for period, commit in all_snapshots: + if period not in processed_periods or (reprocess and period == reprocess): + result.append((period, commit)) + return result + + +def process_repository(repo_slug: str, data_dir: str, reprocess: str | None = None) -> None: # pylint: disable=too-many-locals,too-many-branches,too-many-statements """ Orchestrate the extraction of Ship of Theseus code persistence data @@ -244,11 +266,7 @@ def process_repository(repo_slug: str, data_dir: str) -> None: processed_periods = set(item["snapshot_date"] for item in historical_snapshots) all_snapshots = get_snapshots(temp_repo_path) - new_snapshots = [ - (period, commit) - for period, commit in all_snapshots - if period not in processed_periods - ] + new_snapshots = _filter_snapshots(all_snapshots, processed_periods, reprocess) if not new_snapshots: logger.info( @@ -326,48 +344,37 @@ def process_repository(repo_slug: str, data_dir: str) -> None: ) finally: - if os.path.exists(temp_repo_path): - logger.info("Cleaning up temporary directory: %s", temp_repo_path) + remove_path(temp_repo_path) - def handle_remove_readonly(func, path, _exc_info): - """Handle permission errors on Windows/Unix by adding write permission.""" - try: - current_mode = os.stat(path).st_mode - os.chmod( - path, current_mode | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH - ) - func(path) - except PermissionError as e: - logger.warning("Permission error cleaning up %s: %s", path, e) - except Exception as e: # pylint: disable=broad-exception-caught - logger.warning("Error cleaning up %s: %s", path, e) - for attempt in range(3): - try: - shutil.rmtree(temp_repo_path, onexc=handle_remove_readonly) - break - except Exception as e: # pylint: disable=broad-exception-caught - if attempt < 2: - backoff = 2 ** attempt - logger.warning( - "Cleanup attempt %d failed, retrying in %ds: %s", - attempt + 1, - backoff, - e, - ) - time.sleep(backoff) - else: - logger.error( - "Failed to clean up temporary directory after 3 attempts: %s", - e, - ) - - -def main(): +def main() -> None: """ Main entry point. Loads configuration, creates output directory, and runs the repository analysis pipeline for all specified targets. + + CLI flags + --------- + --repo NAME Process only the given repository (by config name). + --reprocess YYYY-MM + Re-process a specific snapshot period even if it already exists in the data. """ + parser = argparse.ArgumentParser( + description="Analyse repository git history for the Ship of Theseus pipeline." + ) + parser.add_argument( + "--repo", + metavar="NAME", + default=None, + help="Only process this repository (e.g. 'react'). If omitted, all repos are processed.", + ) + parser.add_argument( + "--reprocess", + metavar="YYYY-MM", + default=None, + help="Re-process a specific snapshot period (e.g. '2023-06').", + ) + args = parser.parse_args() + logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", @@ -378,34 +385,57 @@ def main(): DATA_OUTPUT_DIR = config.get("dataDir", "./data") os.makedirs(DATA_OUTPUT_DIR, exist_ok=True) - TARGETS = [ - repo["repo"] for repo in config.get("repositories", []) if "repo" in repo - ] - if not TARGETS: + # Build from config: name -> repo slug + all_targets: dict[str, str] = { + repo["name"]: repo["repo"] + for repo in config.get("repositories", []) + if "name" in repo and "repo" in repo + } + if not all_targets: logger.error("No valid repositories found in configuration.") sys.exit(1) + if args.repo: + if args.repo not in all_targets: + logger.error( + "Unknown repository '%s'. Valid options: %s", + args.repo, + ", ".join(all_targets), + ) + sys.exit(1) + selected_targets = {args.repo: all_targets[args.repo]} + logger.info("Processing single repository: %s", args.repo) + else: + selected_targets = all_targets + logger.info("Processing %d repositories", len(selected_targets)) + + if args.reprocess: + logger.info("Re-processing period: %s", args.reprocess) + # Bound top-level workers by CPU count max_top_level_workers = min( - len(TARGETS), int(os.getenv("MAX_TOP_LEVEL_WORKERS", os.cpu_count() or 1)) + len(selected_targets), + int(os.getenv("MAX_TOP_LEVEL_WORKERS", os.cpu_count() or 1)), ) overall_start = time.perf_counter() - logger.info("Starting analysis pipeline for %d repositories", len(TARGETS)) with concurrent.futures.ThreadPoolExecutor( max_workers=max_top_level_workers ) as executor: futures = { - executor.submit(process_repository, target, DATA_OUTPUT_DIR): target - for target in TARGETS + executor.submit( + process_repository, slug, DATA_OUTPUT_DIR, args.reprocess + ): name + for name, slug in selected_targets.items() } for future in concurrent.futures.as_completed(futures): - target = futures[future] + name = futures[future] try: future.result() + logger.info("✓ %s completed successfully.", name) except Exception as e: # pylint: disable=broad-exception-caught - logger.error("Failed to process %s: %s", target, e) + logger.error("Failed to process %s: %s", name, e) overall_elapsed = time.perf_counter() - overall_start logger.info("TOTAL PIPELINE EXECUTION TIME: %.2f seconds", overall_elapsed) diff --git a/tests/test_analyse_repository.py b/tests/test_analyse_repository.py index efcc66a..3882cfa 100644 --- a/tests/test_analyse_repository.py +++ b/tests/test_analyse_repository.py @@ -10,7 +10,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # pylint: disable=wrong-import-position,import-error -from scripts.analyse_repository import _parse_blame_output, load_existing_state +from scripts.analyse_repository import ( + _filter_snapshots, + _parse_blame_output, + load_existing_state, +) class TestParseBlameOutput: @@ -141,3 +145,41 @@ def test_corrupted_json_returns_empty(self): assert result == {"snapshots": [], "fossils": {}} os.unlink(f.name) + + +class TestFilterSnapshots: + """Tests for the snapshot filtering helper.""" + + def test_filters_out_processed_periods(self): + """Test that processed periods are excluded from the result.""" + all_snaps = [("2020-01", "a"), ("2020-02", "b"), ("2020-03", "c")] + processed = {"2020-01", "2020-03"} + result = _filter_snapshots(all_snaps, processed) + assert result == [("2020-02", "b")] + + def test_returns_all_when_none_processed(self): + """Test that when no periods have been processed, all snapshots are returned.""" + all_snaps = [("2020-01", "a"), ("2020-02", "b")] + result = _filter_snapshots(all_snaps, set()) + assert result == all_snaps + + def test_empty_input(self): + """Test that an empty snapshot list returns an empty list.""" + result = _filter_snapshots([], set()) + assert result == [] + + def test_reprocess_includes_specific_period(self): + """Test that a reprocess period is included even if it was already processed.""" + all_snaps = [("2020-01", "a"), ("2020-02", "b"), ("2020-03", "c")] + processed = {"2020-01", "2020-03"} + result = _filter_snapshots(all_snaps, processed, reprocess="2020-01") + assert ("2020-01", "a") in result + assert ("2020-02", "b") in result + assert ("2020-03", "c") not in result + + def test_reprocess_with_unprocessed_period(self): + """Test that reprocessing an unprocessed period just includes it normally.""" + all_snaps = [("2020-01", "a"), ("2020-02", "b")] + processed = {"2020-02"} + result = _filter_snapshots(all_snaps, processed, reprocess="2020-01") + assert result == [("2020-01", "a")] From 1384e165da6f331f1e5830daa2a4b0f616f9210d Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sun, 31 May 2026 15:54:02 +0530 Subject: [PATCH 5/9] @33 updated the fetched data --- data/claude-code_data.json | 1 + data/langchain_data.json | 1 + data/numpy_data.json | 1 + data/react_data.json | 1 + data/zed_data.json | 1 + scripts/add_fossils.py | 13 ++++++++--- scripts/cleanup_data.py | 4 +++- theseus.config.json | 44 ++------------------------------------ 8 files changed, 20 insertions(+), 46 deletions(-) create mode 100644 data/claude-code_data.json create mode 100644 data/langchain_data.json create mode 100644 data/numpy_data.json create mode 100644 data/react_data.json create mode 100644 data/zed_data.json diff --git a/data/claude-code_data.json b/data/claude-code_data.json new file mode 100644 index 0000000..b264712 --- /dev/null +++ b/data/claude-code_data.json @@ -0,0 +1 @@ +{"snapshots":[{"snapshot_date":"2025-02","composition":{"2025":315}},{"snapshot_date":"2025-03","composition":{"2025":368}},{"snapshot_date":"2025-04","composition":{"2025":752}},{"snapshot_date":"2025-05","composition":{"2025":847}},{"snapshot_date":"2025-06","composition":{"2025":77617}},{"snapshot_date":"2025-07","composition":{"2025":77977}},{"snapshot_date":"2025-08","composition":{"2025":78851}},{"snapshot_date":"2025-09","composition":{"2025":56419}},{"snapshot_date":"2025-10","composition":{"2025":60375}},{"snapshot_date":"2025-11","composition":{"2025":86353}},{"snapshot_date":"2025-12","composition":{"2025":86746}},{"snapshot_date":"2026-01","composition":{"2025":86493,"2026":716}},{"snapshot_date":"2026-02","composition":{"2025":86216,"2026":1694}},{"snapshot_date":"2026-03","composition":{"2026":2376,"2025":86216}},{"snapshot_date":"2026-04","composition":{"2025":86210,"2026":3326}},{"snapshot_date":"2026-05","composition":{"2025":85997,"2026":11497}}],"fossils":{"genesis":{"timestamp":1740421074,"file":"SECURITY.md","content":"# Security Policy","year":"2025","commit":"429d03e","view_commit":"429d03e3823d7dfeaf162321f033f2b667c9b554","line":1},"survivor":{"timestamp":1740245022,"file":".devcontainer/init-firewall.sh","content":"#!/bin/bash","year":"2025","commit":"bd5ca70","view_commit":"main","line":1}}} \ No newline at end of file diff --git a/data/langchain_data.json b/data/langchain_data.json new file mode 100644 index 0000000..8f02778 --- /dev/null +++ b/data/langchain_data.json @@ -0,0 +1 @@ +{"snapshots":[{"snapshot_date":"2022-12","composition":{"2022":33781}},{"snapshot_date":"2023-03","composition":{"2022":23173,"2023":99857}},{"snapshot_date":"2023-06","composition":{"2023":673974,"2022":18477,"2026":4}},{"snapshot_date":"2023-09","composition":{"2023":737974,"2022":15979,"2026":4}},{"snapshot_date":"2023-12","composition":{"2023":1060355,"2022":11100,"2026":8}},{"snapshot_date":"2024-03","composition":{"2023":889068,"2022":10561,"2024":491964,"2026":8}},{"snapshot_date":"2024-06","composition":{"2024":566702,"2023":709506,"2022":7565,"2026":8}},{"snapshot_date":"2024-09","composition":{"2024":658281,"2023":662951,"2022":7142,"2026":8}},{"snapshot_date":"2024-12","composition":{"2024":720724,"2023":544255,"2022":7122,"2026":8}},{"snapshot_date":"2025-01","composition":{"2023":542769,"2024":707768,"2022":7106,"2025":56667,"2026":8}},{"snapshot_date":"2025-02","composition":{"2024":657110,"2023":533502,"2022":6529,"2025":124263,"2026":8}},{"snapshot_date":"2025-03","composition":{"2025":156895,"2023":529492,"2024":639961,"2022":6506,"2026":8}},{"snapshot_date":"2025-04","composition":{"2025":144495,"2023":315832,"2024":448736,"2022":5672,"2026":4}},{"snapshot_date":"2025-05","composition":{"2025":158158,"2023":313824,"2024":444368,"2022":5671,"2026":4}},{"snapshot_date":"2025-06","composition":{"2023":313268,"2024":441875,"2022":5671,"2025":170299,"2026":4}},{"snapshot_date":"2025-07","composition":{"2023":309042,"2025":203651,"2024":435352,"2022":5429,"2026":4}},{"snapshot_date":"2025-08","composition":{"2025":222636,"2023":308685,"2024":433568,"2022":5427,"2026":4}},{"snapshot_date":"2025-09","composition":{"2023":306919,"2025":253228,"2024":429514,"2022":5409,"2026":4}},{"snapshot_date":"2025-10","composition":{"2025":213219,"2023":83609,"2024":138310,"2022":3632,"2026":2}},{"snapshot_date":"2025-11","composition":{"2025":213381,"2023":83271,"2024":137738,"2022":3489,"2026":2}},{"snapshot_date":"2025-12","composition":{"2025":221811,"2023":83009,"2024":136936,"2022":3485,"2026":2}},{"snapshot_date":"2026-01","composition":{"2025":207615,"2026":12524,"2023":81152,"2024":128469,"2022":3485}},{"snapshot_date":"2026-02","composition":{"2025":198810,"2023":81133,"2026":35849,"2024":126788,"2022":3484}},{"snapshot_date":"2026-03","composition":{"2025":195107,"2023":81091,"2024":126636,"2026":47777,"2022":3475}},{"snapshot_date":"2026-04","composition":{"2025":194170,"2026":71165,"2023":81047,"2024":126480,"2022":3475}},{"snapshot_date":"2026-05","composition":{"2025":192543,"2026":86935,"2023":81024,"2024":125964,"2022":3475}}],"fossils":{"genesis":{"timestamp":1666666586,"file":"examples/natbot.py","content":"\"\"\"Run NatBot.\"\"\"","year":"2022","commit":"1ef3ab4","view_commit":"1ef3ab4d0e663be147a5bcf542045e1f4a065778","line":1},"survivor":{"timestamp":1666648275,"file":".github/workflows/_lint.yml","content":"jobs:","year":"2022","commit":"18aeb72","view_commit":"master","line":33}}} \ No newline at end of file diff --git a/data/numpy_data.json b/data/numpy_data.json new file mode 100644 index 0000000..3e60189 --- /dev/null +++ b/data/numpy_data.json @@ -0,0 +1 @@ +{"snapshots":[{"snapshot_date":"2001-12","composition":{"2001":1865}},{"snapshot_date":"2002-03","composition":{"2002":94339,"2001":1472}},{"snapshot_date":"2002-06","composition":{"2002":102869,"2001":1179}},{"snapshot_date":"2002-09","composition":{"2002":130360,"2001":1167}},{"snapshot_date":"2002-12","composition":{"2002":132966,"2001":1130}},{"snapshot_date":"2003-03","composition":{"2002":132607,"2003":2305,"2001":1052}},{"snapshot_date":"2003-06","composition":{"2003":2688,"2002":132569,"2001":1047}},{"snapshot_date":"2003-09","composition":{"2003":3793,"2002":132461,"2001":1036}},{"snapshot_date":"2003-12","composition":{"2002":131017,"2003":5328,"2001":1009}},{"snapshot_date":"2004-03","composition":{"2002":129547,"2003":3964,"2004":5960,"2001":449}},{"snapshot_date":"2004-06","composition":{"2004":9689,"2002":129500,"2003":3916,"2001":443}},{"snapshot_date":"2004-09","composition":{"2004":10562,"2002":128923,"2003":3868,"2001":443}},{"snapshot_date":"2004-12","composition":{"2002":128551,"2003":3680,"2004":13272,"2001":437}},{"snapshot_date":"2005-03","composition":{"2002":128546,"2003":3676,"2004":13125,"2005":352,"2001":437}},{"snapshot_date":"2005-06","composition":{"2004":13106,"2005":1801,"2002":128527,"2003":3655,"2001":437}},{"snapshot_date":"2005-09","composition":{"2005":150609,"2002":120948,"2004":2743,"2003":2178}},{"snapshot_date":"2005-12","composition":{"2005":192483,"2002":95435,"2004":2503,"2003":1868}},{"snapshot_date":"2006-03","composition":{"2006":24364,"2005":147165,"2002":1852,"2004":152,"2003":357}},{"snapshot_date":"2006-06","composition":{"2006":42885,"2005":144356,"2002":1837,"2004":148,"2003":355}},{"snapshot_date":"2006-09","composition":{"2005":135193,"2006":106620,"2002":1809,"2004":145,"2003":339}},{"snapshot_date":"2006-12","composition":{"2006":82177,"2005":134843,"2002":1803,"2004":144,"2003":339}},{"snapshot_date":"2007-03","composition":{"2005":134590,"2006":76164,"2007":13620,"2002":1749,"2004":144,"2003":333}},{"snapshot_date":"2007-06","composition":{"2005":133132,"2006":74427,"2007":21742,"2002":1747,"2004":144,"2003":333}},{"snapshot_date":"2007-09","composition":{"2006":61136,"2005":124879,"2007":49906,"2002":1742,"2004":144,"2003":333}},{"snapshot_date":"2007-12","composition":{"2006":60016,"2007":58304,"2005":123821,"2002":1739,"2004":144,"2003":332}},{"snapshot_date":"2008-03","composition":{"2005":122014,"2007":61831,"2006":58723,"2008":11648,"2002":1736,"2004":144,"2003":328}},{"snapshot_date":"2008-06","composition":{"2005":120599,"2006":52922,"2008":35857,"2007":54121,"2002":1602,"2004":141,"2003":316}},{"snapshot_date":"2008-09","composition":{"2005":119750,"2006":33511,"2008":136007,"2007":46987,"2002":1353,"2004":138,"2003":281}},{"snapshot_date":"2008-12","composition":{"2005":118888,"2006":33032,"2007":45694,"2008":162098,"2002":1352,"2004":138,"2003":281}},{"snapshot_date":"2009-03","composition":{"2005":118010,"2006":31300,"2007":40897,"2008":155221,"2009":45419,"2002":1351,"2004":138,"2003":278}},{"snapshot_date":"2009-06","composition":{"2005":116213,"2006":26011,"2008":148517,"2009":87758,"2007":30084,"2002":1346,"2004":138,"2003":278}},{"snapshot_date":"2009-09","composition":{"2005":116065,"2006":25778,"2007":29821,"2008":146944,"2009":100827,"2002":1158,"2004":136,"2003":278}},{"snapshot_date":"2009-12","composition":{"2008":143661,"2009":145005,"2006":25011,"2005":113855,"2007":28736,"2002":1158,"2004":136,"2003":273}},{"snapshot_date":"2010-03","composition":{"2009":136932,"2008":142180,"2005":111850,"2006":24474,"2007":28449,"2010":17329,"2002":1064,"2004":123,"2003":259}},{"snapshot_date":"2010-06","composition":{"2005":111719,"2006":23646,"2007":28023,"2008":141950,"2009":136184,"2010":23093,"2002":1057,"2004":123,"2003":259}},{"snapshot_date":"2010-09","composition":{"2010":28562,"2009":135219,"2006":23589,"2005":111669,"2008":141483,"2007":25037,"2002":1055,"2004":123,"2003":259}},{"snapshot_date":"2010-12","composition":{"2010":43152,"2009":125155,"2006":23469,"2005":111642,"2008":140070,"2007":24957,"2002":1046,"2004":123,"2003":259}},{"snapshot_date":"2011-03","composition":{"2010":49445,"2005":111319,"2006":23132,"2007":23654,"2011":38997,"2009":119000,"2008":138780,"2002":1039,"2004":123,"2003":257}},{"snapshot_date":"2011-06","composition":{"2010":48692,"2011":58195,"2008":138338,"2009":115195,"2006":22996,"2005":111133,"2007":23524,"2002":995,"2004":122,"2003":257}},{"snapshot_date":"2011-09","composition":{"2011":88031,"2010":46487,"2008":137097,"2009":112421,"2006":22699,"2005":110990,"2007":22880,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2011-12","composition":{"2010":46148,"2011":92965,"2005":110983,"2006":22541,"2007":22841,"2008":137041,"2009":110759,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2012-03","composition":{"2010":42510,"2011":95632,"2006":22193,"2005":110401,"2007":22505,"2009":108210,"2008":136391,"2012":12933,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2012-06","composition":{"2010":41767,"2012":20487,"2011":82718,"2006":21976,"2009":107301,"2005":110335,"2007":22419,"2008":136224,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2012-09","composition":{"2010":41149,"2012":30421,"2011":79589,"2005":110282,"2006":21736,"2007":22074,"2009":105328,"2008":135866,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2012-12","composition":{"2010":40909,"2012":32983,"2011":79086,"2005":110248,"2006":21734,"2007":22062,"2008":135824,"2009":104923,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2013-03","composition":{"2012":29693,"2010":40794,"2011":78031,"2013":7110,"2005":109997,"2006":21580,"2007":21690,"2008":134251,"2009":103977,"2002":989,"2004":121,"2003":254}},{"snapshot_date":"2013-06","composition":{"2012":183454,"2010":39837,"2011":77317,"2013":24040,"2005":35090,"2006":21080,"2007":21351,"2009":101933,"2008":133562,"2002":987,"2004":120,"2003":253}},{"snapshot_date":"2013-09","composition":{"2012":181910,"2013":44136,"2010":37648,"2011":75123,"2006":14587,"2009":97168,"2005":33008,"2007":20078,"2008":127205,"2002":711,"2004":78,"2003":252}},{"snapshot_date":"2013-12","composition":{"2013":46300,"2012":180411,"2010":37466,"2011":74810,"2006":14451,"2009":96747,"2008":126806,"2005":32818,"2007":19912,"2002":709,"2004":77,"2003":252}},{"snapshot_date":"2014-03","composition":{"2010":36748,"2013":43606,"2012":178530,"2011":73040,"2014":18240,"2006":13029,"2009":94898,"2005":30273,"2007":19769,"2008":45858,"2002":696,"2004":77,"2003":252}},{"snapshot_date":"2014-06","composition":{"2010":36188,"2012":171754,"2014":13714,"2013":41708,"2011":70672,"2005":30161,"2006":12022,"2007":18289,"2009":87455,"2008":45113,"2002":695,"2004":77,"2003":252}},{"snapshot_date":"2014-09","composition":{"2012":171112,"2014":22362,"2013":40712,"2010":35603,"2011":69888,"2005":29302,"2006":11875,"2007":18123,"2009":86038,"2008":44758,"2002":635,"2004":60,"2003":252}},{"snapshot_date":"2014-12","composition":{"2010":35489,"2014":25570,"2013":40593,"2012":170931,"2011":69766,"2006":11771,"2009":85664,"2005":29175,"2007":17939,"2008":44138,"2002":634,"2004":60,"2003":252}},{"snapshot_date":"2015-03","composition":{"2013":39957,"2010":35074,"2015":6679,"2012":170636,"2014":26701,"2011":69486,"2008":43679,"2009":84818,"2005":29095,"2006":11512,"2007":17877,"2002":634,"2004":60,"2003":252}},{"snapshot_date":"2015-06","composition":{"2015":12670,"2012":170329,"2010":34951,"2014":27726,"2013":39699,"2011":68961,"2005":29020,"2006":11386,"2007":17747,"2008":43249,"2009":84286,"2002":631,"2004":60,"2003":252}},{"snapshot_date":"2015-09","composition":{"2015":29669,"2010":34469,"2014":27200,"2012":169803,"2013":37171,"2011":67345,"2006":10984,"2009":83646,"2005":26685,"2007":16829,"2008":41734,"2002":630,"2004":56,"2003":234}},{"snapshot_date":"2015-12","composition":{"2010":34274,"2014":26998,"2013":37010,"2015":35389,"2012":169642,"2011":66650,"2008":41411,"2009":83086,"2006":10946,"2005":26566,"2007":16737,"2002":629,"2004":56,"2003":234,"2016":1}},{"snapshot_date":"2016-03","composition":{"2012":169383,"2015":36532,"2016":7776,"2010":34125,"2014":26521,"2013":36586,"2006":10683,"2008":41062,"2009":82603,"2011":65855,"2005":26355,"2007":16480,"2002":629,"2004":56,"2003":234}},{"snapshot_date":"2016-06","composition":{"2013":36522,"2010":34002,"2015":36608,"2012":169365,"2014":26371,"2011":65773,"2016":10759,"2008":40860,"2009":82403,"2006":10650,"2005":26319,"2007":16317,"2002":623,"2004":56,"2003":234}},{"snapshot_date":"2016-09","composition":{"2012":169253,"2015":36237,"2016":15735,"2010":33913,"2014":26125,"2013":36265,"2008":40740,"2009":82284,"2011":65666,"2006":10609,"2005":26245,"2007":16285,"2002":623,"2004":56,"2003":234}},{"snapshot_date":"2016-12","composition":{"2015":36071,"2012":169155,"2016":21595,"2014":25757,"2013":35985,"2010":33535,"2011":65332,"2008":40607,"2009":82163,"2005":26216,"2007":16223,"2006":10541,"2002":622,"2004":56,"2003":233}},{"snapshot_date":"2017-03","composition":{"2014":25576,"2017":110922,"2015":35648,"2013":35176,"2012":62863,"2016":39453,"2010":33095,"2011":65019,"2008":39631,"2009":68413,"2006":10382,"2005":25919,"2007":16089,"2002":570,"2004":51,"2003":233}},{"snapshot_date":"2017-06","composition":{"2014":24843,"2017":120751,"2015":35392,"2012":62775,"2016":38979,"2013":33867,"2010":32885,"2011":64813,"2008":39305,"2009":66773,"2006":10225,"2005":17793,"2007":15934,"2002":563,"2004":49,"2003":233}},{"snapshot_date":"2017-09","composition":{"2014":24395,"2017":133110,"2012":62537,"2015":34827,"2016":38009,"2013":33024,"2010":32136,"2011":63614,"2006":10061,"2008":38254,"2009":65072,"2005":17186,"2007":15636,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2017-12","composition":{"2017":149940,"2015":34262,"2016":37400,"2012":62341,"2013":32565,"2014":24265,"2010":31890,"2011":63048,"2008":37707,"2009":64466,"2006":9933,"2005":17051,"2007":15468,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2018-03","composition":{"2014":22677,"2017":150500,"2015":33944,"2016":36999,"2013":32357,"2012":61939,"2018":5510,"2010":31760,"2011":62549,"2006":9853,"2008":37535,"2009":64198,"2005":16919,"2007":15411,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2018-06","composition":{"2018":19708,"2013":31673,"2017":150670,"2015":33690,"2016":36365,"2010":31401,"2012":61569,"2014":22274,"2011":61256,"2008":36911,"2009":63381,"2006":9774,"2005":16840,"2007":15269,"2002":550,"2004":49,"2003":232}},{"snapshot_date":"2018-09","composition":{"2017":150375,"2015":33569,"2018":30527,"2016":36174,"2014":21992,"2013":31339,"2010":31239,"2012":61426,"2011":60402,"2008":36478,"2009":62752,"2005":16558,"2007":15145,"2006":9397,"2002":543,"2004":49,"2003":232}},{"snapshot_date":"2018-12","composition":{"2018":48265,"2017":149585,"2015":32778,"2016":35741,"2014":21809,"2010":31055,"2012":61190,"2013":31020,"2011":59340,"2005":14910,"2009":60872,"2006":9063,"2008":35767,"2007":15047,"2002":539,"2004":49,"2003":232}},{"snapshot_date":"2019-03","composition":{"2018":49111,"2019":9272,"2015":32514,"2017":149531,"2016":35564,"2013":30072,"2014":21280,"2012":60851,"2008":35532,"2009":60367,"2010":30642,"2011":58341,"2006":9016,"2005":14835,"2007":15001,"2002":539,"2004":49,"2003":232}},{"snapshot_date":"2019-06","composition":{"2017":149695,"2015":32943,"2018":41877,"2016":36017,"2014":21938,"2013":31076,"2010":31147,"2012":61227,"2011":59492,"2008":36237,"2009":61872,"2006":9065,"2005":16430,"2007":15084,"2002":539,"2004":49,"2019":64,"2003":232}},{"snapshot_date":"2019-09","composition":{"2018":66238,"2019":47674,"2017":149155,"2015":31132,"2016":34764,"2014":20457,"2013":29628,"2010":30023,"2012":60316,"2011":57507,"2008":33249,"2009":57563,"2006":8623,"2005":12783,"2007":14521,"2002":538,"2004":49,"2003":231}},{"snapshot_date":"2019-12","composition":{"2018":65394,"2019":56820,"2014":19795,"2017":148850,"2015":30831,"2010":29927,"2016":34561,"2012":60140,"2013":29363,"2011":57286,"2008":32874,"2009":57304,"2006":8557,"2005":12717,"2007":14497,"2002":537,"2004":49,"2003":231}},{"snapshot_date":"2020-03","composition":{"2018":64016,"2019":57180,"2020":24182,"2015":30240,"2014":19448,"2017":147161,"2013":28115,"2012":59428,"2016":33939,"2010":27781,"2011":55546,"2008":32306,"2009":55012,"2006":8293,"2005":12409,"2007":14306,"2002":531,"2004":49,"2003":230}},{"snapshot_date":"2020-06","composition":{"2018":63130,"2019":56788,"2020":48902,"2015":30033,"2014":19034,"2017":147034,"2010":27613,"2016":33750,"2012":59098,"2013":27979,"2011":54685,"2008":32160,"2009":54186,"2006":8235,"2005":12251,"2007":14291,"2002":531,"2004":49,"2003":230}},{"snapshot_date":"2020-09","composition":{"2018":62241,"2019":55422,"2020":77584,"2014":18764,"2017":145552,"2015":29338,"2010":19266,"2016":33581,"2012":58501,"2013":27478,"2011":51167,"2005":12176,"2009":47014,"2008":30427,"2006":8067,"2007":14242,"2002":527,"2004":49,"2003":230}},{"snapshot_date":"2020-12","composition":{"2018":61019,"2020":114533,"2019":54317,"2014":18594,"2017":145129,"2015":28817,"2012":58445,"2016":33480,"2013":26876,"2010":19070,"2011":50577,"2008":30008,"2009":46735,"2006":8047,"2005":12142,"2007":14182,"2002":526,"2004":49,"2003":230}},{"snapshot_date":"2021-03","composition":{"2018":61349,"2020":104740,"2019":54845,"2014":18621,"2017":145434,"2015":29101,"2010":19104,"2016":33526,"2012":58457,"2013":27231,"2011":50614,"2008":30056,"2009":46855,"2006":8051,"2005":12145,"2007":14189,"2021":2222,"2002":527,"2004":49,"2003":230}},{"snapshot_date":"2021-06","composition":{"2020":112039,"2021":45060,"2015":28456,"2019":53018,"2018":59930,"2010":18935,"2016":33218,"2012":58314,"2017":144293,"2014":18363,"2013":26466,"2011":47525,"2006":8013,"2008":30260,"2009":46200,"2005":12104,"2007":14141,"2002":524,"2004":49,"2003":230}},{"snapshot_date":"2021-09","composition":{"2021":97407,"2018":59052,"2020":109278,"2019":52650,"2015":27816,"2012":58200,"2016":33061,"2017":143713,"2013":25955,"2010":18656,"2014":17505,"2011":46683,"2008":29288,"2009":45809,"2006":7931,"2005":11659,"2007":13859,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2021-12","composition":{"2018":58489,"2021":121839,"2020":106659,"2019":52144,"2015":27360,"2010":17939,"2016":32977,"2012":57388,"2017":143312,"2014":16983,"2013":25758,"2011":46229,"2008":29088,"2009":45709,"2006":7907,"2005":11544,"2007":13832,"2022":617,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2022-03","composition":{"2018":58156,"2019":51279,"2021":125507,"2020":105377,"2015":26951,"2022":17766,"2010":17808,"2016":32844,"2012":56311,"2017":143069,"2014":16635,"2013":25031,"2011":46017,"2006":7895,"2008":28815,"2009":45556,"2005":11537,"2007":13822,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2022-06","composition":{"2018":57807,"2021":126201,"2019":50945,"2015":26535,"2020":104275,"2022":37666,"2010":17599,"2016":32719,"2012":55823,"2017":142586,"2014":16534,"2013":24376,"2011":45697,"2006":7797,"2008":28642,"2009":44994,"2005":11438,"2007":13733,"2002":516,"2004":48,"2003":229}},{"snapshot_date":"2022-09","composition":{"2015":26468,"2021":125733,"2019":50800,"2020":104008,"2018":57726,"2022":42875,"2010":17534,"2016":32655,"2012":55811,"2017":142510,"2014":16422,"2013":24318,"2011":45687,"2008":28526,"2009":44517,"2006":7794,"2005":11438,"2007":13668,"2002":516,"2004":48,"2003":229}},{"snapshot_date":"2022-12","composition":{"2018":57356,"2021":124792,"2020":103301,"2022":55956,"2019":50605,"2015":25906,"2010":17490,"2016":32562,"2012":55746,"2017":142114,"2014":16365,"2013":23784,"2011":45494,"2008":28450,"2009":44247,"2006":7761,"2005":11405,"2007":13577,"2003":229,"2004":48,"2002":516}},{"snapshot_date":"2023-03","composition":{"2022":59169,"2023":10359,"2021":120525,"2018":56770,"2019":49742,"2020":100987,"2015":25710,"2012":55648,"2016":32315,"2013":23482,"2017":140354,"2010":17341,"2014":16282,"2011":45145,"2008":28012,"2009":44088,"2006":7683,"2005":11389,"2007":13558,"2002":516,"2004":48,"2003":228}},{"snapshot_date":"2023-06","composition":{"2022":58438,"2023":34208,"2018":56373,"2021":107985,"2019":49163,"2020":100184,"2015":25495,"2012":55502,"2016":32250,"2017":139945,"2010":16797,"2014":16125,"2013":23318,"2011":44806,"2006":7209,"2008":27784,"2009":43728,"2005":11334,"2007":13493,"2002":516,"2004":48,"2003":227}},{"snapshot_date":"2023-09","composition":{"2022":55701,"2023":62492,"2018":55435,"2021":100402,"2019":47753,"2020":94592,"2015":24632,"2012":55265,"2016":30433,"2017":138546,"2010":16400,"2014":15672,"2013":22552,"2011":44120,"2008":26921,"2009":41512,"2006":6928,"2005":11158,"2007":13056,"2004":48,"2003":220,"2002":509}},{"snapshot_date":"2023-12","composition":{"2018":53945,"2021":98880,"2022":54600,"2023":91904,"2019":47067,"2020":93527,"2015":24156,"2012":54822,"2016":30177,"2017":137828,"2010":15736,"2014":15179,"2013":21937,"2011":43821,"2008":26233,"2009":40323,"2006":6464,"2005":10910,"2007":12624,"2004":31,"2003":217,"2002":486}},{"snapshot_date":"2024-03","composition":{"2018":50761,"2021":93663,"2022":52840,"2023":83911,"2024":30118,"2020":92796,"2019":46690,"2015":24044,"2012":54601,"2016":30014,"2017":137644,"2010":15661,"2014":15119,"2013":21678,"2011":43446,"2008":26095,"2009":39081,"2006":6389,"2005":10883,"2007":12600,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-06","composition":{"2018":50598,"2022":52335,"2023":82879,"2021":93119,"2019":46371,"2024":39429,"2020":92289,"2015":23986,"2012":54472,"2016":29981,"2017":137433,"2010":15623,"2014":15091,"2013":21640,"2011":43348,"2006":6379,"2008":26022,"2005":10869,"2009":39036,"2007":12593,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-09","composition":{"2022":51246,"2023":81060,"2018":49868,"2021":91300,"2019":45868,"2024":58495,"2020":91665,"2015":23851,"2012":54407,"2016":29872,"2017":137201,"2008":25874,"2009":38030,"2014":14959,"2010":15329,"2013":21558,"2011":42777,"2006":6362,"2005":10829,"2007":12568,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-12","composition":{"2022":50652,"2023":79580,"2021":89442,"2018":49399,"2024":79718,"2020":90816,"2019":45459,"2015":23478,"2010":15070,"2016":29631,"2012":54260,"2017":136825,"2014":14558,"2013":20957,"2011":40920,"2008":25636,"2009":37605,"2006":6288,"2005":10787,"2007":12206,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-01","composition":{"2022":50616,"2023":79451,"2018":49378,"2019":45451,"2021":89244,"2024":80020,"2020":90760,"2025":2383,"2015":23478,"2010":15065,"2016":29621,"2012":54234,"2017":136711,"2014":14557,"2013":20932,"2011":40903,"2008":25635,"2009":37562,"2006":6288,"2005":10787,"2007":12206,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-02","composition":{"2021":89104,"2025":6210,"2022":50497,"2023":79170,"2018":49255,"2024":79734,"2020":90255,"2019":45295,"2015":23474,"2010":15058,"2016":29571,"2012":54148,"2017":136630,"2014":14553,"2013":20915,"2011":40815,"2006":6278,"2008":25633,"2009":37544,"2005":10787,"2007":12204,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-03","composition":{"2023":78817,"2018":49216,"2024":78977,"2022":49805,"2020":90085,"2021":88042,"2025":12625,"2019":45287,"2015":23424,"2010":15051,"2016":29570,"2012":54148,"2017":136627,"2014":14545,"2013":20864,"2011":40815,"2008":25623,"2009":37543,"2006":6278,"2005":10787,"2007":11959,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-04","composition":{"2018":49092,"2021":86817,"2022":49392,"2023":77780,"2024":77274,"2020":89805,"2025":19503,"2019":45201,"2015":23102,"2010":15012,"2016":29497,"2012":54125,"2017":136524,"2014":14525,"2013":20769,"2011":40682,"2008":25591,"2009":37512,"2006":6270,"2005":10762,"2007":11951,"2004":23,"2003":211,"2002":464}},{"snapshot_date":"2025-05","composition":{"2022":49090,"2023":77083,"2021":85761,"2025":27815,"2018":48737,"2024":76096,"2020":89333,"2019":45009,"2015":22923,"2010":14979,"2016":29405,"2012":54096,"2017":136356,"2014":14458,"2013":20628,"2011":40637,"2008":25546,"2009":37291,"2006":6254,"2005":10703,"2007":11866,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-06","composition":{"2022":49034,"2023":76925,"2018":48697,"2024":75818,"2020":89213,"2021":85570,"2025":32977,"2019":44978,"2015":22919,"2010":14972,"2016":29396,"2012":54070,"2017":136354,"2014":14454,"2013":20607,"2011":40621,"2005":10703,"2008":25546,"2007":11865,"2006":6254,"2009":37289,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-07","composition":{"2022":48965,"2023":76778,"2021":85459,"2025":35930,"2018":48554,"2024":75473,"2020":89067,"2019":44888,"2015":22890,"2010":14950,"2016":29238,"2012":54062,"2017":136300,"2014":14447,"2013":20592,"2011":40589,"2008":25513,"2009":37219,"2006":6254,"2005":10699,"2007":11858,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-08","composition":{"2021":84949,"2025":39378,"2022":48746,"2023":76487,"2018":48540,"2019":44846,"2024":75168,"2020":88972,"2015":22809,"2010":14906,"2016":29174,"2012":54022,"2017":136261,"2014":14425,"2013":20457,"2011":40518,"2006":6254,"2008":25462,"2009":36955,"2005":10699,"2007":11844,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-09","composition":{"2022":48341,"2023":76041,"2021":81877,"2025":44888,"2018":48164,"2019":44345,"2020":88807,"2024":74576,"2015":22727,"2012":53973,"2016":28894,"2017":136220,"2010":14874,"2014":14395,"2013":20405,"2011":40432,"2008":25414,"2009":36912,"2005":10693,"2007":11841,"2006":6223,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-10","composition":{"2022":48193,"2023":75293,"2018":47992,"2019":44311,"2021":80322,"2025":53114,"2020":88604,"2024":72867,"2015":22561,"2010":14854,"2016":28862,"2012":53957,"2017":136093,"2014":14375,"2013":20347,"2011":40375,"2008":25189,"2009":36813,"2006":6195,"2005":10677,"2007":11833,"2004":17,"2002":442,"2003":46}},{"snapshot_date":"2025-11","composition":{"2022":47944,"2023":74782,"2021":79606,"2025":59992,"2018":47831,"2020":88251,"2024":72354,"2019":44191,"2015":22537,"2010":14843,"2016":28752,"2012":53950,"2017":135910,"2014":14359,"2013":20274,"2011":40175,"2006":6059,"2008":25102,"2009":36633,"2005":10673,"2007":11780,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2025-12","composition":{"2022":47372,"2023":72787,"2018":47273,"2025":66882,"2020":83382,"2021":77625,"2024":70724,"2019":42674,"2015":21873,"2010":14549,"2016":28263,"2012":53465,"2017":134788,"2014":13988,"2013":19504,"2011":39948,"2008":24669,"2009":34785,"2006":4411,"2005":6078,"2007":9963,"2004":17,"2002":439,"2003":46,"2026":4}},{"snapshot_date":"2026-01","composition":{"2022":47319,"2023":72372,"2021":76903,"2025":66534,"2018":47222,"2019":42563,"2020":83215,"2024":70327,"2015":21829,"2026":5958,"2010":14537,"2016":28262,"2012":53457,"2017":134734,"2014":13972,"2013":19463,"2011":39886,"2006":4399,"2005":6067,"2008":24649,"2007":9953,"2009":34693,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-02","composition":{"2021":76792,"2025":66399,"2022":47279,"2023":72221,"2018":46641,"2020":82838,"2024":69905,"2019":42547,"2015":21821,"2026":7958,"2010":14529,"2016":28172,"2012":53453,"2017":134717,"2014":13935,"2013":19456,"2011":39882,"2008":24622,"2009":34684,"2006":4398,"2005":6064,"2007":9946,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-03","composition":{"2021":76470,"2025":66092,"2022":47222,"2023":72001,"2018":46580,"2020":82576,"2024":69743,"2019":42499,"2015":21647,"2026":12114,"2010":14494,"2016":28145,"2012":53430,"2017":134676,"2014":13906,"2013":19444,"2011":39765,"2008":24608,"2009":34566,"2006":4397,"2005":6057,"2007":9938,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-04","composition":{"2022":46950,"2023":71633,"2021":76068,"2025":65188,"2018":46533,"2020":82479,"2024":69500,"2019":42465,"2015":21593,"2026":22321,"2010":14488,"2016":28127,"2012":53406,"2017":134598,"2014":13899,"2013":19408,"2011":39360,"2008":24570,"2009":34442,"2006":4392,"2005":6057,"2007":9933,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-05","composition":{"2021":75838,"2025":64924,"2018":46067,"2023":71355,"2022":44333,"2020":82388,"2024":69295,"2019":40932,"2015":21574,"2026":30358,"2010":14393,"2016":28123,"2012":53303,"2017":134591,"2014":13881,"2013":19370,"2011":39224,"2008":24569,"2009":34416,"2006":4384,"2005":6057,"2007":9932,"2004":17,"2002":439,"2003":46}}],"fossils":{"genesis":{"timestamp":1009260221,"file":"scipy_test/setup_scipy_test.py","content":"#!/usr/bin/env python","year":"2001","commit":"74a4958","view_commit":"74a4958b94e07b2e3218741fb52d32e87308d62e","line":1},"survivor":{"timestamp":1017446578,"file":"numpy/lib/_polynomial_impl.py","content":"def poly(seq_of_zeros):","year":"2002","commit":"0562713","view_commit":"main","line":40}}} \ No newline at end of file diff --git a/data/react_data.json b/data/react_data.json new file mode 100644 index 0000000..74cb247 --- /dev/null +++ b/data/react_data.json @@ -0,0 +1 @@ +{"snapshots":[{"snapshot_date":"2013-06","composition":{"2013":49299}},{"snapshot_date":"2013-09","composition":{"2013":61256}},{"snapshot_date":"2013-12","composition":{"2013":130512,"2014":6}},{"snapshot_date":"2014-03","composition":{"2014":23020,"2013":123687}},{"snapshot_date":"2014-06","composition":{"2013":122493,"2014":30244}},{"snapshot_date":"2014-09","composition":{"2013":120244,"2014":44148}},{"snapshot_date":"2014-12","composition":{"2013":114261,"2014":70090}},{"snapshot_date":"2015-03","composition":{"2014":67640,"2015":84107,"2013":110637}},{"snapshot_date":"2015-06","composition":{"2014":63060,"2013":107612,"2015":103761}},{"snapshot_date":"2015-09","composition":{"2014":61104,"2015":128842,"2013":99577}},{"snapshot_date":"2015-12","composition":{"2014":65919,"2013":97668,"2015":152156}},{"snapshot_date":"2016-03","composition":{"2013":97380,"2015":151704,"2014":59135,"2016":15687}},{"snapshot_date":"2016-06","composition":{"2016":60253,"2013":92596,"2015":144933,"2014":58094}},{"snapshot_date":"2016-09","composition":{"2014":56579,"2016":107146,"2015":127449,"2013":90605}},{"snapshot_date":"2016-12","composition":{"2014":53620,"2016":171100,"2015":93767,"2013":87505}},{"snapshot_date":"2017-03","composition":{"2016":153719,"2017":40564,"2013":85813,"2015":90162,"2014":51722}},{"snapshot_date":"2017-06","composition":{"2016":112177,"2017":100865,"2013":80747,"2015":85364,"2014":49079}},{"snapshot_date":"2017-09","composition":{"2014":42959,"2016":95520,"2017":170284,"2015":78029,"2013":71880}},{"snapshot_date":"2017-12","composition":{"2016":32719,"2017":141395,"2014":3091,"2013":4275,"2015":9115}},{"snapshot_date":"2018-03","composition":{"2017":132926,"2013":4240,"2015":8893,"2014":2840,"2016":31430,"2018":29528}},{"snapshot_date":"2018-06","composition":{"2016":28845,"2017":125268,"2018":50483,"2015":8742,"2013":4144,"2014":2801}},{"snapshot_date":"2018-09","composition":{"2017":121415,"2018":89215,"2016":27784,"2013":4030,"2015":8679,"2014":2750}},{"snapshot_date":"2018-12","composition":{"2017":119427,"2018":110439,"2016":27687,"2014":2745,"2013":4028,"2015":8663}},{"snapshot_date":"2019-03","composition":{"2019":27130}},{"snapshot_date":"2019-06","composition":{"2017":117513,"2015":8651,"2016":26894,"2018":88585,"2019":65595,"2013":4006,"2014":2733}},{"snapshot_date":"2019-09","composition":{"2017":116862,"2019":154937,"2018":85418,"2016":26444,"2015":8633,"2013":4006,"2014":2731}},{"snapshot_date":"2019-12","composition":{"2017":115372,"2019":178970,"2018":80928,"2016":26181,"2015":8651,"2014":2731,"2013":4002}},{"snapshot_date":"2020-03","composition":{"2016":25817,"2018":75202,"2020":42887,"2017":112448,"2019":158206,"2013":3752,"2015":8488,"2014":2625}},{"snapshot_date":"2020-06","composition":{"2019":149670,"2020":109834,"2016":24821,"2018":74006,"2017":111520,"2015":8475,"2014":2635,"2013":3705}},{"snapshot_date":"2020-09","composition":{"2019":132657,"2020":140316,"2017":109379,"2018":71429,"2016":24599,"2015":8365,"2014":2109,"2013":3381}},{"snapshot_date":"2020-12","composition":{"2019":127254,"2020":182464,"2017":106722,"2018":71568,"2013":3381,"2015":8365,"2014":2109,"2016":24570}},{"snapshot_date":"2021-03","composition":{"2019":122222,"2021":25610,"2020":172383,"2017":106608,"2018":70547,"2016":24558,"2015":8363,"2013":3381,"2014":2109}},{"snapshot_date":"2021-06","composition":{"2019":116227,"2021":70698,"2017":104705,"2020":162225,"2018":64628,"2016":24421,"2015":8330,"2014":2097,"2013":3366}},{"snapshot_date":"2021-09","composition":{"2020":153417,"2019":114206,"2021":117474,"2017":104170,"2018":63959,"2015":8185,"2016":24311,"2014":2047,"2013":3347}},{"snapshot_date":"2021-12","composition":{"2021":136954,"2017":103766,"2019":112937,"2020":150902,"2018":63738,"2014":2047,"2016":24244,"2015":8119,"2013":3347}},{"snapshot_date":"2022-03","composition":{"2019":107105,"2021":130490,"2020":147765,"2017":103594,"2015":8093,"2016":24233,"2022":24396,"2018":63526,"2014":2036,"2013":3347}},{"snapshot_date":"2022-06","composition":{"2020":145774,"2019":105925,"2021":127685,"2017":103566,"2022":49310,"2018":62370,"2015":8081,"2016":24217,"2014":2036,"2013":3343}},{"snapshot_date":"2022-09","composition":{"2016":24202,"2018":61123,"2020":139446,"2017":102772,"2019":100139,"2021":124382,"2022":81289,"2014":2036,"2013":3343,"2015":8069}},{"snapshot_date":"2022-12","composition":{"2019":97166,"2021":116918,"2017":101538,"2022":90700,"2020":119753,"2018":59933,"2015":8036,"2016":24147,"2014":2030,"2013":3308}},{"snapshot_date":"2023-03","composition":{"2019":92349,"2021":109360,"2023":49082,"2020":98523,"2014":1943,"2017":97452,"2022":74008,"2016":23525,"2018":57598,"2013":3211,"2015":7685}},{"snapshot_date":"2023-06","composition":{"2019":90609,"2021":105316,"2023":71271,"2017":96874,"2022":71044,"2020":95370,"2016":23444,"2018":57130,"2015":7678,"2014":1942,"2013":3203}},{"snapshot_date":"2023-09","composition":{"2020":94881,"2019":89683,"2021":104279,"2023":89091,"2017":96761,"2022":70205,"2016":23437,"2018":56929,"2014":1942,"2015":7678,"2013":3203}},{"snapshot_date":"2023-12","composition":{"2023":322197,"2021":58572,"2022":9860}},{"snapshot_date":"2024-03","composition":{"2019":78286,"2021":98780,"2023":91993,"2017":92511,"2015":6865,"2016":21601,"2020":89985,"2013":2982,"2014":1611,"2024":56403,"2018":52282,"2022":64120}},{"snapshot_date":"2024-06","composition":{"2024":146695,"2021":109652,"2017":91402,"2019":76480,"2022":69399,"2023":387233,"2020":86381,"2013":2827,"2015":6451,"2014":1434,"2016":21189,"2018":51177}},{"snapshot_date":"2024-09","composition":{"2016":21077,"2018":50727,"2020":84491,"2022":66008,"2024":207309,"2017":91319,"2019":73735,"2021":106604,"2023":372741,"2014":1434,"2015":6450,"2013":2827}},{"snapshot_date":"2024-12","composition":{"2020":83431,"2021":105682,"2024":235398,"2016":21010,"2018":49947,"2022":64957,"2017":91080,"2019":72870,"2023":369720,"2015":6409,"2013":2798,"2014":1424}},{"snapshot_date":"2025-01","composition":{"2024":228979,"2019":72002,"2021":105295,"2023":146990,"2020":81905,"2017":90754,"2015":6344,"2016":20945,"2013":2794,"2014":1412,"2018":48950,"2022":64653,"2025":30519}},{"snapshot_date":"2025-02","composition":{"2024":226806,"2019":70686,"2021":105235,"2023":145909,"2020":81740,"2017":90722,"2015":6341,"2016":20945,"2018":48854,"2022":64519,"2025":43401,"2013":2793,"2014":1412}},{"snapshot_date":"2025-03","composition":{"2024":221556,"2019":70526,"2021":105086,"2023":144980,"2020":81491,"2017":90713,"2015":6341,"2016":20943,"2018":48777,"2022":63742,"2025":76772,"2013":2793,"2014":1412}},{"snapshot_date":"2025-04","composition":{"2024":220576,"2019":70454,"2025":94566,"2021":103830,"2023":143787,"2020":81428,"2017":90707,"2015":6341,"2016":20943,"2018":48770,"2022":63508,"2014":1412,"2013":2793}},{"snapshot_date":"2025-05","composition":{"2024":218220,"2019":70363,"2025":112419,"2021":103777,"2023":142803,"2020":81289,"2017":90702,"2015":6341,"2016":20942,"2018":48749,"2022":63378,"2014":1412,"2013":2793}},{"snapshot_date":"2025-06","composition":{"2024":215878,"2019":70222,"2025":137311,"2021":103737,"2023":142387,"2017":90701,"2015":6341,"2016":20939,"2020":81261,"2018":48706,"2022":63159,"2013":2793,"2014":1412}},{"snapshot_date":"2025-07","composition":{"2024":212777,"2019":69634,"2025":153753,"2021":103393,"2023":141860,"2020":81061,"2017":90690,"2015":6334,"2016":20929,"2014":1412,"2013":2793,"2018":48343,"2022":62740}},{"snapshot_date":"2025-08","composition":{"2024":209351,"2019":69500,"2025":185075,"2021":103254,"2020":80847,"2016":20929,"2018":48330,"2022":62013,"2017":90685,"2023":140410,"2014":1412,"2015":6334,"2013":2793}},{"snapshot_date":"2025-09","composition":{"2024":208280,"2019":69355,"2025":199735,"2021":103075,"2020":80758,"2017":90684,"2015":6334,"2016":20929,"2014":1412,"2018":48324,"2022":61753,"2023":139982,"2013":2793}},{"snapshot_date":"2025-10","composition":{"2024":207431,"2019":69272,"2025":209366,"2021":103037,"2020":80677,"2017":90684,"2015":6334,"2016":20929,"2013":2793,"2014":1412,"2018":48322,"2022":61695,"2023":139651}},{"snapshot_date":"2025-11","composition":{"2019":69017,"2025":220684,"2021":103016,"2024":205775,"2017":90652,"2015":6334,"2016":20915,"2020":80599,"2013":2793,"2014":1412,"2018":48289,"2022":61679,"2023":136904}},{"snapshot_date":"2025-12","composition":{"2024":205425,"2019":68968,"2025":226961,"2021":103003,"2020":80579,"2017":90637,"2015":4434,"2016":20915,"2013":2793,"2014":1412,"2018":48289,"2022":61658,"2023":136615}},{"snapshot_date":"2026-01","composition":{"2026":19721,"2024":203679,"2019":68844,"2025":223578,"2021":102930,"2020":80546,"2017":90634,"2015":4433,"2016":20907,"2013":2793,"2014":1412,"2018":48210,"2022":61557,"2023":136219}},{"snapshot_date":"2026-02","composition":{"2026":33082,"2024":195521,"2019":68808,"2025":215585,"2021":102920,"2020":80409,"2014":1412,"2013":2792,"2015":4433,"2016":20907,"2018":48210,"2022":61380,"2017":90634,"2023":134343}},{"snapshot_date":"2026-03","composition":{"2026":38141,"2024":195277,"2019":68486,"2025":215136,"2021":102785,"2020":80280,"2017":90630,"2015":4433,"2016":20907,"2013":2792,"2014":1412,"2018":48104,"2022":61321,"2023":134227}},{"snapshot_date":"2026-04","composition":{"2026":45339,"2024":194885,"2019":68005,"2025":214779,"2021":102512,"2017":89469,"2015":4433,"2016":20263,"2020":79804,"2018":47417,"2022":61254,"2023":133996,"2014":1412,"2013":2792}},{"snapshot_date":"2026-05","composition":{"2026":48526,"2024":194575,"2019":67825,"2025":214195,"2021":102171,"2020":79676,"2017":89431,"2015":4433,"2016":20263,"2018":46991,"2022":61249,"2023":133881,"2014":1412,"2013":2792}}],"fossils":{"genesis":{"timestamp":1369771850,"file":"vendor/jasmine/HtmlReporter.js","content":"var jasmine = require(\"./jasmine\");","year":"2013","commit":"f8af932","view_commit":"f8af93237adaa7c02df9edcbfccd07e6fdaaa0ed","line":1},"survivor":{"timestamp":1369856771,"file":".editorconfig","content":"root = true","year":"2013","commit":"75897c2","view_commit":"main","line":2}}} \ No newline at end of file diff --git a/data/zed_data.json b/data/zed_data.json new file mode 100644 index 0000000..2a52555 --- /dev/null +++ b/data/zed_data.json @@ -0,0 +1 @@ +{"snapshots":[{"snapshot_date":"2021-03","composition":{"2021":25386}},{"snapshot_date":"2021-06","composition":{"2021":44898}},{"snapshot_date":"2021-09","composition":{"2021":76985}},{"snapshot_date":"2021-12","composition":{"2021":94004}},{"snapshot_date":"2022-03","composition":{"2021":73693,"2022":388255,"2026":4}},{"snapshot_date":"2022-06","composition":{"2021":51112,"2022":439373,"2026":4}},{"snapshot_date":"2022-09","composition":{"2021":47923,"2022":461986,"2026":4}},{"snapshot_date":"2022-12","composition":{"2022":491696,"2021":42941,"2026":4}},{"snapshot_date":"2023-03","composition":{"2022":476272,"2023":50012,"2021":39796,"2026":4}},{"snapshot_date":"2023-06","composition":{"2023":113736,"2022":453849,"2021":36084,"2026":4}},{"snapshot_date":"2023-09","composition":{"2023":201675,"2022":444799,"2021":35314,"2026":4}},{"snapshot_date":"2023-12","composition":{"2023":568550,"2022":439807,"2021":34913,"2026":8}},{"snapshot_date":"2024-03","composition":{"2024":190937,"2023":147177,"2022":411340,"2021":22875,"2026":113}},{"snapshot_date":"2024-06","composition":{"2024":309661,"2023":130643,"2022":66647,"2021":21876,"2026":151}},{"snapshot_date":"2024-09","composition":{"2024":441405,"2023":116364,"2021":20941,"2022":61998,"2026":162}},{"snapshot_date":"2024-12","composition":{"2024":522212,"2023":111939,"2021":19618,"2022":58843,"2026":170}},{"snapshot_date":"2025-01","composition":{"2025":87396,"2024":473046,"2023":106560,"2022":56478,"2021":19030,"2026":175}},{"snapshot_date":"2025-02","composition":{"2025":136662,"2024":445718,"2023":104630,"2021":18947,"2022":55697,"2026":182}},{"snapshot_date":"2025-03","composition":{"2025":232121,"2024":420152,"2023":100255,"2022":53002,"2021":18700,"2026":182}},{"snapshot_date":"2025-04","composition":{"2026":189,"2025":295774,"2024":405727,"2023":98899,"2022":51571,"2021":18379}},{"snapshot_date":"2025-05","composition":{"2026":189,"2025":383631,"2024":389034,"2023":97957,"2022":51077,"2021":18342}},{"snapshot_date":"2025-06","composition":{"2026":194,"2025":435521,"2024":381915,"2023":96914,"2021":18310,"2022":50493}},{"snapshot_date":"2025-07","composition":{"2026":205,"2025":505702,"2024":370960,"2023":96086,"2021":18028,"2022":50134}},{"snapshot_date":"2025-08","composition":{"2024":342196,"2025":587373,"2023":94193,"2026":210,"2022":49275,"2021":17791}},{"snapshot_date":"2025-09","composition":{"2026":217,"2025":647340,"2024":322913,"2023":89361,"2022":48437,"2021":17312}},{"snapshot_date":"2025-10","composition":{"2026":216,"2025":785509,"2024":313682,"2023":87171,"2022":47531,"2021":16776}},{"snapshot_date":"2025-11","composition":{"2026":216,"2025":829198,"2024":308252,"2023":85866,"2022":46995,"2021":16515}},{"snapshot_date":"2025-12","composition":{"2024":303394,"2026":3260,"2025":878595,"2023":84991,"2022":46559,"2021":16345}},{"snapshot_date":"2026-01","composition":{"2025":827305,"2024":296062,"2023":82999,"2026":155530,"2022":46183,"2021":16051}},{"snapshot_date":"2026-02","composition":{"2025":792544,"2024":287028,"2023":81769,"2026":287243,"2022":44769,"2021":15713}},{"snapshot_date":"2026-03","composition":{"2025":759242,"2024":272004,"2023":80746,"2026":424335,"2022":44357,"2021":15498}},{"snapshot_date":"2026-04","composition":{"2024":267533,"2025":738032,"2026":536594,"2023":78982,"2022":43488,"2021":15278}},{"snapshot_date":"2026-05","composition":{"2026":647040,"2025":678530,"2024":262431,"2023":77869,"2022":41743,"2021":15214}}],"fossils":{"genesis":{"timestamp":1613862336,"file":"gpui/src/executor.rs","content":"// #[cfg(not(test))]","year":"2021","commit":"222f9d3","view_commit":"222f9d373df677d7c5f8427984b4206f36f53a2a","line":1},"survivor":{"timestamp":1613840554,"file":"Cargo.toml","content":"[workspace]","year":"2021","commit":"b400449","view_commit":"main","line":1}}} \ No newline at end of file diff --git a/scripts/add_fossils.py b/scripts/add_fossils.py index 06a3590..c59d5a4 100644 --- a/scripts/add_fossils.py +++ b/scripts/add_fossils.py @@ -32,7 +32,7 @@ if _SCRIPTS_DIR not in sys.path: sys.path.insert(0, _SCRIPTS_DIR) -from _utils import get_default_branch, run_command, load_config, remove_path +from _utils import get_default_branch, load_config, remove_path, run_command logger = logging.getLogger(__name__) @@ -87,7 +87,11 @@ def _blame_file(repo_path: str | Path, file_path: str, view_commit: str = "") -> fossil["line"] = line_num else: parts = line.split(" ") - if parts and len(parts[0]) in (40, 64) and all(c in "0123456789abcdef" for c in parts[0].lower()): + if ( + parts + and len(parts[0]) in (40, 64) + and all(c in "0123456789abcdef" for c in parts[0].lower()) + ): current_commit_data = {"commit": parts[0]} elif line.startswith("author-time ") and len(parts) >= 2: try: @@ -99,7 +103,10 @@ def _blame_file(repo_path: str | Path, file_path: str, view_commit: str = "") -> def _blame_files_parallel( - repo_path: str | Path, files: list[str], view_commit: str = "", max_workers: int = 20 + repo_path: str | Path, + files: list[str], + view_commit: str = "", + max_workers: int = 20, ) -> dict: """Blame a list of files in parallel and return the single oldest fossil found.""" global_oldest = _blank_fossil() diff --git a/scripts/cleanup_data.py b/scripts/cleanup_data.py index 2180312..8db9cb4 100644 --- a/scripts/cleanup_data.py +++ b/scripts/cleanup_data.py @@ -57,7 +57,7 @@ def cleanup_data(data_dir: str) -> bool: max_year = int(snapshot_date[:4]) composition = snapshot.get("composition", {}) keys_to_remove = [ - year for year in composition.keys() if int(year) > max_year + year for year in composition.keys() if int(year) > max_year ] for key in keys_to_remove: del composition[key] @@ -78,6 +78,7 @@ def cleanup_data(data_dir: str) -> bool: return had_failures + def main(): config = load_config() data_dir = config.get("dataDir", "./data") @@ -85,5 +86,6 @@ def main(): print("One or more files failed to clean up. Exiting non-zero.") sys.exit(1) + if __name__ == "__main__": main() diff --git a/theseus.config.json b/theseus.config.json index d9345ec..709df00 100644 --- a/theseus.config.json +++ b/theseus.config.json @@ -12,11 +12,6 @@ "title": "LangChain v0.1", "description": "First stable architecture release with standardized core components." }, - { - "date": "2024-06", - "title": "Monorepo & LangGraph Cloud", - "description": "Massive expansion with LangGraph Cloud and integration packages." - }, { "date": "2025-10", "title": "LangChain v1.0 (The Pruning)", @@ -46,19 +41,9 @@ "description": "Introduced Hooks for state management without classes." }, { - "date": "2024-04", + "date": "2024-06", "title": "React 19 major update", "description": "Added Server Components and new form handling." - }, - { - "date": "2019-06", - "title": "Data quirk: code looks different", - "description": "Data quirk: Internal code reorganization." - }, - { - "date": "2023-06", - "title": "Data quirk: lots missing", - "description": "Data quirk: Missing historical data." } ] }, @@ -73,21 +58,11 @@ "title": "NumPy is born", "description": "Created by merging Numeric and Numarray." }, - { - "date": "2006-03", - "title": "NumPy 1.0 released", - "description": "Official 1.0 release as a standalone library." - }, { "date": "2013-04", "title": "Python 2 & 3 unified", "description": "Unified support for Python 2 and 3." }, - { - "date": "2017-06", - "title": "Major features added", - "description": "Major update with 50+ new functions." - }, { "date": "2024-06", "title": "NumPy 2.0 released", @@ -101,25 +76,10 @@ "description": "High-performance, GPU-accelerated code editor for teamwork.", "repo": "zed-industries/zed", "milestones": [ - { - "date": "2023-03", - "title": "Zed Beta Launch", - "description": "High-performance code editor enters public beta on macOS." - }, { "date": "2023-12", "title": "GPUI2 Transition and Open Sourced", "description": "Major rewrite of the UI framework for 120 FPS performance and The Zed source code is made available to the public." - }, - { - "date": "2024-06", - "title": "Linux Release", - "description": "Official launch of Zed for Linux after months of development." - }, - { - "date": "2024-09", - "title": "Zed AI Launch", - "description": "Introduction of native AI features and the Zed AI ecosystem." } ] }, @@ -147,4 +107,4 @@ ] } ] -} \ No newline at end of file +} From d3d8f138cacbc15b50fe278ed60ab1a057fdc64c Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sun, 31 May 2026 16:02:23 +0530 Subject: [PATCH 6/9] #33 fixing the test file --- data/langchain_data.json | 2 +- data/numpy_data.json | 2 +- data/react_data.json | 2 +- data/zed_data.json | 2 +- scripts/analyse_repository.py | 12 ++++++++++++ 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/data/langchain_data.json b/data/langchain_data.json index 8f02778..c576733 100644 --- a/data/langchain_data.json +++ b/data/langchain_data.json @@ -1 +1 @@ -{"snapshots":[{"snapshot_date":"2022-12","composition":{"2022":33781}},{"snapshot_date":"2023-03","composition":{"2022":23173,"2023":99857}},{"snapshot_date":"2023-06","composition":{"2023":673974,"2022":18477,"2026":4}},{"snapshot_date":"2023-09","composition":{"2023":737974,"2022":15979,"2026":4}},{"snapshot_date":"2023-12","composition":{"2023":1060355,"2022":11100,"2026":8}},{"snapshot_date":"2024-03","composition":{"2023":889068,"2022":10561,"2024":491964,"2026":8}},{"snapshot_date":"2024-06","composition":{"2024":566702,"2023":709506,"2022":7565,"2026":8}},{"snapshot_date":"2024-09","composition":{"2024":658281,"2023":662951,"2022":7142,"2026":8}},{"snapshot_date":"2024-12","composition":{"2024":720724,"2023":544255,"2022":7122,"2026":8}},{"snapshot_date":"2025-01","composition":{"2023":542769,"2024":707768,"2022":7106,"2025":56667,"2026":8}},{"snapshot_date":"2025-02","composition":{"2024":657110,"2023":533502,"2022":6529,"2025":124263,"2026":8}},{"snapshot_date":"2025-03","composition":{"2025":156895,"2023":529492,"2024":639961,"2022":6506,"2026":8}},{"snapshot_date":"2025-04","composition":{"2025":144495,"2023":315832,"2024":448736,"2022":5672,"2026":4}},{"snapshot_date":"2025-05","composition":{"2025":158158,"2023":313824,"2024":444368,"2022":5671,"2026":4}},{"snapshot_date":"2025-06","composition":{"2023":313268,"2024":441875,"2022":5671,"2025":170299,"2026":4}},{"snapshot_date":"2025-07","composition":{"2023":309042,"2025":203651,"2024":435352,"2022":5429,"2026":4}},{"snapshot_date":"2025-08","composition":{"2025":222636,"2023":308685,"2024":433568,"2022":5427,"2026":4}},{"snapshot_date":"2025-09","composition":{"2023":306919,"2025":253228,"2024":429514,"2022":5409,"2026":4}},{"snapshot_date":"2025-10","composition":{"2025":213219,"2023":83609,"2024":138310,"2022":3632,"2026":2}},{"snapshot_date":"2025-11","composition":{"2025":213381,"2023":83271,"2024":137738,"2022":3489,"2026":2}},{"snapshot_date":"2025-12","composition":{"2025":221811,"2023":83009,"2024":136936,"2022":3485,"2026":2}},{"snapshot_date":"2026-01","composition":{"2025":207615,"2026":12524,"2023":81152,"2024":128469,"2022":3485}},{"snapshot_date":"2026-02","composition":{"2025":198810,"2023":81133,"2026":35849,"2024":126788,"2022":3484}},{"snapshot_date":"2026-03","composition":{"2025":195107,"2023":81091,"2024":126636,"2026":47777,"2022":3475}},{"snapshot_date":"2026-04","composition":{"2025":194170,"2026":71165,"2023":81047,"2024":126480,"2022":3475}},{"snapshot_date":"2026-05","composition":{"2025":192543,"2026":86935,"2023":81024,"2024":125964,"2022":3475}}],"fossils":{"genesis":{"timestamp":1666666586,"file":"examples/natbot.py","content":"\"\"\"Run NatBot.\"\"\"","year":"2022","commit":"1ef3ab4","view_commit":"1ef3ab4d0e663be147a5bcf542045e1f4a065778","line":1},"survivor":{"timestamp":1666648275,"file":".github/workflows/_lint.yml","content":"jobs:","year":"2022","commit":"18aeb72","view_commit":"master","line":33}}} \ No newline at end of file +{"snapshots":[{"snapshot_date":"2022-12","composition":{"2022":33781}},{"snapshot_date":"2023-03","composition":{"2022":23173,"2023":99857}},{"snapshot_date":"2023-06","composition":{"2023":673974,"2022":18477}},{"snapshot_date":"2023-09","composition":{"2023":737974,"2022":15979}},{"snapshot_date":"2023-12","composition":{"2023":1060355,"2022":11100}},{"snapshot_date":"2024-03","composition":{"2023":889068,"2022":10561,"2024":491964}},{"snapshot_date":"2024-06","composition":{"2024":566702,"2023":709506,"2022":7565}},{"snapshot_date":"2024-09","composition":{"2024":658281,"2023":662951,"2022":7142}},{"snapshot_date":"2024-12","composition":{"2024":720724,"2023":544255,"2022":7122}},{"snapshot_date":"2025-01","composition":{"2023":542769,"2024":707768,"2022":7106,"2025":56667}},{"snapshot_date":"2025-02","composition":{"2024":657110,"2023":533502,"2022":6529,"2025":124263}},{"snapshot_date":"2025-03","composition":{"2025":156895,"2023":529492,"2024":639961,"2022":6506}},{"snapshot_date":"2025-04","composition":{"2025":144495,"2023":315832,"2024":448736,"2022":5672}},{"snapshot_date":"2025-05","composition":{"2025":158158,"2023":313824,"2024":444368,"2022":5671}},{"snapshot_date":"2025-06","composition":{"2023":313268,"2024":441875,"2022":5671,"2025":170299}},{"snapshot_date":"2025-07","composition":{"2023":309042,"2025":203651,"2024":435352,"2022":5429}},{"snapshot_date":"2025-08","composition":{"2025":222636,"2023":308685,"2024":433568,"2022":5427}},{"snapshot_date":"2025-09","composition":{"2023":306919,"2025":253228,"2024":429514,"2022":5409}},{"snapshot_date":"2025-10","composition":{"2025":213219,"2023":83609,"2024":138310,"2022":3632}},{"snapshot_date":"2025-11","composition":{"2025":213381,"2023":83271,"2024":137738,"2022":3489}},{"snapshot_date":"2025-12","composition":{"2025":221811,"2023":83009,"2024":136936,"2022":3485}},{"snapshot_date":"2026-01","composition":{"2025":207615,"2026":12524,"2023":81152,"2024":128469,"2022":3485}},{"snapshot_date":"2026-02","composition":{"2025":198810,"2023":81133,"2026":35849,"2024":126788,"2022":3484}},{"snapshot_date":"2026-03","composition":{"2025":195107,"2023":81091,"2024":126636,"2026":47777,"2022":3475}},{"snapshot_date":"2026-04","composition":{"2025":194170,"2026":71165,"2023":81047,"2024":126480,"2022":3475}},{"snapshot_date":"2026-05","composition":{"2025":192543,"2026":86935,"2023":81024,"2024":125964,"2022":3475}}],"fossils":{"genesis":{"timestamp":1666666586,"file":"examples/natbot.py","content":"\"\"\"Run NatBot.\"\"\"","year":"2022","commit":"1ef3ab4","view_commit":"1ef3ab4d0e663be147a5bcf542045e1f4a065778","line":1},"survivor":{"timestamp":1666648275,"file":".github/workflows/_lint.yml","content":"jobs:","year":"2022","commit":"18aeb72","view_commit":"master","line":33}}} \ No newline at end of file diff --git a/data/numpy_data.json b/data/numpy_data.json index 3e60189..98a7e9a 100644 --- a/data/numpy_data.json +++ b/data/numpy_data.json @@ -1 +1 @@ -{"snapshots":[{"snapshot_date":"2001-12","composition":{"2001":1865}},{"snapshot_date":"2002-03","composition":{"2002":94339,"2001":1472}},{"snapshot_date":"2002-06","composition":{"2002":102869,"2001":1179}},{"snapshot_date":"2002-09","composition":{"2002":130360,"2001":1167}},{"snapshot_date":"2002-12","composition":{"2002":132966,"2001":1130}},{"snapshot_date":"2003-03","composition":{"2002":132607,"2003":2305,"2001":1052}},{"snapshot_date":"2003-06","composition":{"2003":2688,"2002":132569,"2001":1047}},{"snapshot_date":"2003-09","composition":{"2003":3793,"2002":132461,"2001":1036}},{"snapshot_date":"2003-12","composition":{"2002":131017,"2003":5328,"2001":1009}},{"snapshot_date":"2004-03","composition":{"2002":129547,"2003":3964,"2004":5960,"2001":449}},{"snapshot_date":"2004-06","composition":{"2004":9689,"2002":129500,"2003":3916,"2001":443}},{"snapshot_date":"2004-09","composition":{"2004":10562,"2002":128923,"2003":3868,"2001":443}},{"snapshot_date":"2004-12","composition":{"2002":128551,"2003":3680,"2004":13272,"2001":437}},{"snapshot_date":"2005-03","composition":{"2002":128546,"2003":3676,"2004":13125,"2005":352,"2001":437}},{"snapshot_date":"2005-06","composition":{"2004":13106,"2005":1801,"2002":128527,"2003":3655,"2001":437}},{"snapshot_date":"2005-09","composition":{"2005":150609,"2002":120948,"2004":2743,"2003":2178}},{"snapshot_date":"2005-12","composition":{"2005":192483,"2002":95435,"2004":2503,"2003":1868}},{"snapshot_date":"2006-03","composition":{"2006":24364,"2005":147165,"2002":1852,"2004":152,"2003":357}},{"snapshot_date":"2006-06","composition":{"2006":42885,"2005":144356,"2002":1837,"2004":148,"2003":355}},{"snapshot_date":"2006-09","composition":{"2005":135193,"2006":106620,"2002":1809,"2004":145,"2003":339}},{"snapshot_date":"2006-12","composition":{"2006":82177,"2005":134843,"2002":1803,"2004":144,"2003":339}},{"snapshot_date":"2007-03","composition":{"2005":134590,"2006":76164,"2007":13620,"2002":1749,"2004":144,"2003":333}},{"snapshot_date":"2007-06","composition":{"2005":133132,"2006":74427,"2007":21742,"2002":1747,"2004":144,"2003":333}},{"snapshot_date":"2007-09","composition":{"2006":61136,"2005":124879,"2007":49906,"2002":1742,"2004":144,"2003":333}},{"snapshot_date":"2007-12","composition":{"2006":60016,"2007":58304,"2005":123821,"2002":1739,"2004":144,"2003":332}},{"snapshot_date":"2008-03","composition":{"2005":122014,"2007":61831,"2006":58723,"2008":11648,"2002":1736,"2004":144,"2003":328}},{"snapshot_date":"2008-06","composition":{"2005":120599,"2006":52922,"2008":35857,"2007":54121,"2002":1602,"2004":141,"2003":316}},{"snapshot_date":"2008-09","composition":{"2005":119750,"2006":33511,"2008":136007,"2007":46987,"2002":1353,"2004":138,"2003":281}},{"snapshot_date":"2008-12","composition":{"2005":118888,"2006":33032,"2007":45694,"2008":162098,"2002":1352,"2004":138,"2003":281}},{"snapshot_date":"2009-03","composition":{"2005":118010,"2006":31300,"2007":40897,"2008":155221,"2009":45419,"2002":1351,"2004":138,"2003":278}},{"snapshot_date":"2009-06","composition":{"2005":116213,"2006":26011,"2008":148517,"2009":87758,"2007":30084,"2002":1346,"2004":138,"2003":278}},{"snapshot_date":"2009-09","composition":{"2005":116065,"2006":25778,"2007":29821,"2008":146944,"2009":100827,"2002":1158,"2004":136,"2003":278}},{"snapshot_date":"2009-12","composition":{"2008":143661,"2009":145005,"2006":25011,"2005":113855,"2007":28736,"2002":1158,"2004":136,"2003":273}},{"snapshot_date":"2010-03","composition":{"2009":136932,"2008":142180,"2005":111850,"2006":24474,"2007":28449,"2010":17329,"2002":1064,"2004":123,"2003":259}},{"snapshot_date":"2010-06","composition":{"2005":111719,"2006":23646,"2007":28023,"2008":141950,"2009":136184,"2010":23093,"2002":1057,"2004":123,"2003":259}},{"snapshot_date":"2010-09","composition":{"2010":28562,"2009":135219,"2006":23589,"2005":111669,"2008":141483,"2007":25037,"2002":1055,"2004":123,"2003":259}},{"snapshot_date":"2010-12","composition":{"2010":43152,"2009":125155,"2006":23469,"2005":111642,"2008":140070,"2007":24957,"2002":1046,"2004":123,"2003":259}},{"snapshot_date":"2011-03","composition":{"2010":49445,"2005":111319,"2006":23132,"2007":23654,"2011":38997,"2009":119000,"2008":138780,"2002":1039,"2004":123,"2003":257}},{"snapshot_date":"2011-06","composition":{"2010":48692,"2011":58195,"2008":138338,"2009":115195,"2006":22996,"2005":111133,"2007":23524,"2002":995,"2004":122,"2003":257}},{"snapshot_date":"2011-09","composition":{"2011":88031,"2010":46487,"2008":137097,"2009":112421,"2006":22699,"2005":110990,"2007":22880,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2011-12","composition":{"2010":46148,"2011":92965,"2005":110983,"2006":22541,"2007":22841,"2008":137041,"2009":110759,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2012-03","composition":{"2010":42510,"2011":95632,"2006":22193,"2005":110401,"2007":22505,"2009":108210,"2008":136391,"2012":12933,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2012-06","composition":{"2010":41767,"2012":20487,"2011":82718,"2006":21976,"2009":107301,"2005":110335,"2007":22419,"2008":136224,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2012-09","composition":{"2010":41149,"2012":30421,"2011":79589,"2005":110282,"2006":21736,"2007":22074,"2009":105328,"2008":135866,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2012-12","composition":{"2010":40909,"2012":32983,"2011":79086,"2005":110248,"2006":21734,"2007":22062,"2008":135824,"2009":104923,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2013-03","composition":{"2012":29693,"2010":40794,"2011":78031,"2013":7110,"2005":109997,"2006":21580,"2007":21690,"2008":134251,"2009":103977,"2002":989,"2004":121,"2003":254}},{"snapshot_date":"2013-06","composition":{"2012":183454,"2010":39837,"2011":77317,"2013":24040,"2005":35090,"2006":21080,"2007":21351,"2009":101933,"2008":133562,"2002":987,"2004":120,"2003":253}},{"snapshot_date":"2013-09","composition":{"2012":181910,"2013":44136,"2010":37648,"2011":75123,"2006":14587,"2009":97168,"2005":33008,"2007":20078,"2008":127205,"2002":711,"2004":78,"2003":252}},{"snapshot_date":"2013-12","composition":{"2013":46300,"2012":180411,"2010":37466,"2011":74810,"2006":14451,"2009":96747,"2008":126806,"2005":32818,"2007":19912,"2002":709,"2004":77,"2003":252}},{"snapshot_date":"2014-03","composition":{"2010":36748,"2013":43606,"2012":178530,"2011":73040,"2014":18240,"2006":13029,"2009":94898,"2005":30273,"2007":19769,"2008":45858,"2002":696,"2004":77,"2003":252}},{"snapshot_date":"2014-06","composition":{"2010":36188,"2012":171754,"2014":13714,"2013":41708,"2011":70672,"2005":30161,"2006":12022,"2007":18289,"2009":87455,"2008":45113,"2002":695,"2004":77,"2003":252}},{"snapshot_date":"2014-09","composition":{"2012":171112,"2014":22362,"2013":40712,"2010":35603,"2011":69888,"2005":29302,"2006":11875,"2007":18123,"2009":86038,"2008":44758,"2002":635,"2004":60,"2003":252}},{"snapshot_date":"2014-12","composition":{"2010":35489,"2014":25570,"2013":40593,"2012":170931,"2011":69766,"2006":11771,"2009":85664,"2005":29175,"2007":17939,"2008":44138,"2002":634,"2004":60,"2003":252}},{"snapshot_date":"2015-03","composition":{"2013":39957,"2010":35074,"2015":6679,"2012":170636,"2014":26701,"2011":69486,"2008":43679,"2009":84818,"2005":29095,"2006":11512,"2007":17877,"2002":634,"2004":60,"2003":252}},{"snapshot_date":"2015-06","composition":{"2015":12670,"2012":170329,"2010":34951,"2014":27726,"2013":39699,"2011":68961,"2005":29020,"2006":11386,"2007":17747,"2008":43249,"2009":84286,"2002":631,"2004":60,"2003":252}},{"snapshot_date":"2015-09","composition":{"2015":29669,"2010":34469,"2014":27200,"2012":169803,"2013":37171,"2011":67345,"2006":10984,"2009":83646,"2005":26685,"2007":16829,"2008":41734,"2002":630,"2004":56,"2003":234}},{"snapshot_date":"2015-12","composition":{"2010":34274,"2014":26998,"2013":37010,"2015":35389,"2012":169642,"2011":66650,"2008":41411,"2009":83086,"2006":10946,"2005":26566,"2007":16737,"2002":629,"2004":56,"2003":234,"2016":1}},{"snapshot_date":"2016-03","composition":{"2012":169383,"2015":36532,"2016":7776,"2010":34125,"2014":26521,"2013":36586,"2006":10683,"2008":41062,"2009":82603,"2011":65855,"2005":26355,"2007":16480,"2002":629,"2004":56,"2003":234}},{"snapshot_date":"2016-06","composition":{"2013":36522,"2010":34002,"2015":36608,"2012":169365,"2014":26371,"2011":65773,"2016":10759,"2008":40860,"2009":82403,"2006":10650,"2005":26319,"2007":16317,"2002":623,"2004":56,"2003":234}},{"snapshot_date":"2016-09","composition":{"2012":169253,"2015":36237,"2016":15735,"2010":33913,"2014":26125,"2013":36265,"2008":40740,"2009":82284,"2011":65666,"2006":10609,"2005":26245,"2007":16285,"2002":623,"2004":56,"2003":234}},{"snapshot_date":"2016-12","composition":{"2015":36071,"2012":169155,"2016":21595,"2014":25757,"2013":35985,"2010":33535,"2011":65332,"2008":40607,"2009":82163,"2005":26216,"2007":16223,"2006":10541,"2002":622,"2004":56,"2003":233}},{"snapshot_date":"2017-03","composition":{"2014":25576,"2017":110922,"2015":35648,"2013":35176,"2012":62863,"2016":39453,"2010":33095,"2011":65019,"2008":39631,"2009":68413,"2006":10382,"2005":25919,"2007":16089,"2002":570,"2004":51,"2003":233}},{"snapshot_date":"2017-06","composition":{"2014":24843,"2017":120751,"2015":35392,"2012":62775,"2016":38979,"2013":33867,"2010":32885,"2011":64813,"2008":39305,"2009":66773,"2006":10225,"2005":17793,"2007":15934,"2002":563,"2004":49,"2003":233}},{"snapshot_date":"2017-09","composition":{"2014":24395,"2017":133110,"2012":62537,"2015":34827,"2016":38009,"2013":33024,"2010":32136,"2011":63614,"2006":10061,"2008":38254,"2009":65072,"2005":17186,"2007":15636,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2017-12","composition":{"2017":149940,"2015":34262,"2016":37400,"2012":62341,"2013":32565,"2014":24265,"2010":31890,"2011":63048,"2008":37707,"2009":64466,"2006":9933,"2005":17051,"2007":15468,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2018-03","composition":{"2014":22677,"2017":150500,"2015":33944,"2016":36999,"2013":32357,"2012":61939,"2018":5510,"2010":31760,"2011":62549,"2006":9853,"2008":37535,"2009":64198,"2005":16919,"2007":15411,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2018-06","composition":{"2018":19708,"2013":31673,"2017":150670,"2015":33690,"2016":36365,"2010":31401,"2012":61569,"2014":22274,"2011":61256,"2008":36911,"2009":63381,"2006":9774,"2005":16840,"2007":15269,"2002":550,"2004":49,"2003":232}},{"snapshot_date":"2018-09","composition":{"2017":150375,"2015":33569,"2018":30527,"2016":36174,"2014":21992,"2013":31339,"2010":31239,"2012":61426,"2011":60402,"2008":36478,"2009":62752,"2005":16558,"2007":15145,"2006":9397,"2002":543,"2004":49,"2003":232}},{"snapshot_date":"2018-12","composition":{"2018":48265,"2017":149585,"2015":32778,"2016":35741,"2014":21809,"2010":31055,"2012":61190,"2013":31020,"2011":59340,"2005":14910,"2009":60872,"2006":9063,"2008":35767,"2007":15047,"2002":539,"2004":49,"2003":232}},{"snapshot_date":"2019-03","composition":{"2018":49111,"2019":9272,"2015":32514,"2017":149531,"2016":35564,"2013":30072,"2014":21280,"2012":60851,"2008":35532,"2009":60367,"2010":30642,"2011":58341,"2006":9016,"2005":14835,"2007":15001,"2002":539,"2004":49,"2003":232}},{"snapshot_date":"2019-06","composition":{"2017":149695,"2015":32943,"2018":41877,"2016":36017,"2014":21938,"2013":31076,"2010":31147,"2012":61227,"2011":59492,"2008":36237,"2009":61872,"2006":9065,"2005":16430,"2007":15084,"2002":539,"2004":49,"2019":64,"2003":232}},{"snapshot_date":"2019-09","composition":{"2018":66238,"2019":47674,"2017":149155,"2015":31132,"2016":34764,"2014":20457,"2013":29628,"2010":30023,"2012":60316,"2011":57507,"2008":33249,"2009":57563,"2006":8623,"2005":12783,"2007":14521,"2002":538,"2004":49,"2003":231}},{"snapshot_date":"2019-12","composition":{"2018":65394,"2019":56820,"2014":19795,"2017":148850,"2015":30831,"2010":29927,"2016":34561,"2012":60140,"2013":29363,"2011":57286,"2008":32874,"2009":57304,"2006":8557,"2005":12717,"2007":14497,"2002":537,"2004":49,"2003":231}},{"snapshot_date":"2020-03","composition":{"2018":64016,"2019":57180,"2020":24182,"2015":30240,"2014":19448,"2017":147161,"2013":28115,"2012":59428,"2016":33939,"2010":27781,"2011":55546,"2008":32306,"2009":55012,"2006":8293,"2005":12409,"2007":14306,"2002":531,"2004":49,"2003":230}},{"snapshot_date":"2020-06","composition":{"2018":63130,"2019":56788,"2020":48902,"2015":30033,"2014":19034,"2017":147034,"2010":27613,"2016":33750,"2012":59098,"2013":27979,"2011":54685,"2008":32160,"2009":54186,"2006":8235,"2005":12251,"2007":14291,"2002":531,"2004":49,"2003":230}},{"snapshot_date":"2020-09","composition":{"2018":62241,"2019":55422,"2020":77584,"2014":18764,"2017":145552,"2015":29338,"2010":19266,"2016":33581,"2012":58501,"2013":27478,"2011":51167,"2005":12176,"2009":47014,"2008":30427,"2006":8067,"2007":14242,"2002":527,"2004":49,"2003":230}},{"snapshot_date":"2020-12","composition":{"2018":61019,"2020":114533,"2019":54317,"2014":18594,"2017":145129,"2015":28817,"2012":58445,"2016":33480,"2013":26876,"2010":19070,"2011":50577,"2008":30008,"2009":46735,"2006":8047,"2005":12142,"2007":14182,"2002":526,"2004":49,"2003":230}},{"snapshot_date":"2021-03","composition":{"2018":61349,"2020":104740,"2019":54845,"2014":18621,"2017":145434,"2015":29101,"2010":19104,"2016":33526,"2012":58457,"2013":27231,"2011":50614,"2008":30056,"2009":46855,"2006":8051,"2005":12145,"2007":14189,"2021":2222,"2002":527,"2004":49,"2003":230}},{"snapshot_date":"2021-06","composition":{"2020":112039,"2021":45060,"2015":28456,"2019":53018,"2018":59930,"2010":18935,"2016":33218,"2012":58314,"2017":144293,"2014":18363,"2013":26466,"2011":47525,"2006":8013,"2008":30260,"2009":46200,"2005":12104,"2007":14141,"2002":524,"2004":49,"2003":230}},{"snapshot_date":"2021-09","composition":{"2021":97407,"2018":59052,"2020":109278,"2019":52650,"2015":27816,"2012":58200,"2016":33061,"2017":143713,"2013":25955,"2010":18656,"2014":17505,"2011":46683,"2008":29288,"2009":45809,"2006":7931,"2005":11659,"2007":13859,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2021-12","composition":{"2018":58489,"2021":121839,"2020":106659,"2019":52144,"2015":27360,"2010":17939,"2016":32977,"2012":57388,"2017":143312,"2014":16983,"2013":25758,"2011":46229,"2008":29088,"2009":45709,"2006":7907,"2005":11544,"2007":13832,"2022":617,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2022-03","composition":{"2018":58156,"2019":51279,"2021":125507,"2020":105377,"2015":26951,"2022":17766,"2010":17808,"2016":32844,"2012":56311,"2017":143069,"2014":16635,"2013":25031,"2011":46017,"2006":7895,"2008":28815,"2009":45556,"2005":11537,"2007":13822,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2022-06","composition":{"2018":57807,"2021":126201,"2019":50945,"2015":26535,"2020":104275,"2022":37666,"2010":17599,"2016":32719,"2012":55823,"2017":142586,"2014":16534,"2013":24376,"2011":45697,"2006":7797,"2008":28642,"2009":44994,"2005":11438,"2007":13733,"2002":516,"2004":48,"2003":229}},{"snapshot_date":"2022-09","composition":{"2015":26468,"2021":125733,"2019":50800,"2020":104008,"2018":57726,"2022":42875,"2010":17534,"2016":32655,"2012":55811,"2017":142510,"2014":16422,"2013":24318,"2011":45687,"2008":28526,"2009":44517,"2006":7794,"2005":11438,"2007":13668,"2002":516,"2004":48,"2003":229}},{"snapshot_date":"2022-12","composition":{"2018":57356,"2021":124792,"2020":103301,"2022":55956,"2019":50605,"2015":25906,"2010":17490,"2016":32562,"2012":55746,"2017":142114,"2014":16365,"2013":23784,"2011":45494,"2008":28450,"2009":44247,"2006":7761,"2005":11405,"2007":13577,"2003":229,"2004":48,"2002":516}},{"snapshot_date":"2023-03","composition":{"2022":59169,"2023":10359,"2021":120525,"2018":56770,"2019":49742,"2020":100987,"2015":25710,"2012":55648,"2016":32315,"2013":23482,"2017":140354,"2010":17341,"2014":16282,"2011":45145,"2008":28012,"2009":44088,"2006":7683,"2005":11389,"2007":13558,"2002":516,"2004":48,"2003":228}},{"snapshot_date":"2023-06","composition":{"2022":58438,"2023":34208,"2018":56373,"2021":107985,"2019":49163,"2020":100184,"2015":25495,"2012":55502,"2016":32250,"2017":139945,"2010":16797,"2014":16125,"2013":23318,"2011":44806,"2006":7209,"2008":27784,"2009":43728,"2005":11334,"2007":13493,"2002":516,"2004":48,"2003":227}},{"snapshot_date":"2023-09","composition":{"2022":55701,"2023":62492,"2018":55435,"2021":100402,"2019":47753,"2020":94592,"2015":24632,"2012":55265,"2016":30433,"2017":138546,"2010":16400,"2014":15672,"2013":22552,"2011":44120,"2008":26921,"2009":41512,"2006":6928,"2005":11158,"2007":13056,"2004":48,"2003":220,"2002":509}},{"snapshot_date":"2023-12","composition":{"2018":53945,"2021":98880,"2022":54600,"2023":91904,"2019":47067,"2020":93527,"2015":24156,"2012":54822,"2016":30177,"2017":137828,"2010":15736,"2014":15179,"2013":21937,"2011":43821,"2008":26233,"2009":40323,"2006":6464,"2005":10910,"2007":12624,"2004":31,"2003":217,"2002":486}},{"snapshot_date":"2024-03","composition":{"2018":50761,"2021":93663,"2022":52840,"2023":83911,"2024":30118,"2020":92796,"2019":46690,"2015":24044,"2012":54601,"2016":30014,"2017":137644,"2010":15661,"2014":15119,"2013":21678,"2011":43446,"2008":26095,"2009":39081,"2006":6389,"2005":10883,"2007":12600,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-06","composition":{"2018":50598,"2022":52335,"2023":82879,"2021":93119,"2019":46371,"2024":39429,"2020":92289,"2015":23986,"2012":54472,"2016":29981,"2017":137433,"2010":15623,"2014":15091,"2013":21640,"2011":43348,"2006":6379,"2008":26022,"2005":10869,"2009":39036,"2007":12593,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-09","composition":{"2022":51246,"2023":81060,"2018":49868,"2021":91300,"2019":45868,"2024":58495,"2020":91665,"2015":23851,"2012":54407,"2016":29872,"2017":137201,"2008":25874,"2009":38030,"2014":14959,"2010":15329,"2013":21558,"2011":42777,"2006":6362,"2005":10829,"2007":12568,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-12","composition":{"2022":50652,"2023":79580,"2021":89442,"2018":49399,"2024":79718,"2020":90816,"2019":45459,"2015":23478,"2010":15070,"2016":29631,"2012":54260,"2017":136825,"2014":14558,"2013":20957,"2011":40920,"2008":25636,"2009":37605,"2006":6288,"2005":10787,"2007":12206,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-01","composition":{"2022":50616,"2023":79451,"2018":49378,"2019":45451,"2021":89244,"2024":80020,"2020":90760,"2025":2383,"2015":23478,"2010":15065,"2016":29621,"2012":54234,"2017":136711,"2014":14557,"2013":20932,"2011":40903,"2008":25635,"2009":37562,"2006":6288,"2005":10787,"2007":12206,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-02","composition":{"2021":89104,"2025":6210,"2022":50497,"2023":79170,"2018":49255,"2024":79734,"2020":90255,"2019":45295,"2015":23474,"2010":15058,"2016":29571,"2012":54148,"2017":136630,"2014":14553,"2013":20915,"2011":40815,"2006":6278,"2008":25633,"2009":37544,"2005":10787,"2007":12204,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-03","composition":{"2023":78817,"2018":49216,"2024":78977,"2022":49805,"2020":90085,"2021":88042,"2025":12625,"2019":45287,"2015":23424,"2010":15051,"2016":29570,"2012":54148,"2017":136627,"2014":14545,"2013":20864,"2011":40815,"2008":25623,"2009":37543,"2006":6278,"2005":10787,"2007":11959,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-04","composition":{"2018":49092,"2021":86817,"2022":49392,"2023":77780,"2024":77274,"2020":89805,"2025":19503,"2019":45201,"2015":23102,"2010":15012,"2016":29497,"2012":54125,"2017":136524,"2014":14525,"2013":20769,"2011":40682,"2008":25591,"2009":37512,"2006":6270,"2005":10762,"2007":11951,"2004":23,"2003":211,"2002":464}},{"snapshot_date":"2025-05","composition":{"2022":49090,"2023":77083,"2021":85761,"2025":27815,"2018":48737,"2024":76096,"2020":89333,"2019":45009,"2015":22923,"2010":14979,"2016":29405,"2012":54096,"2017":136356,"2014":14458,"2013":20628,"2011":40637,"2008":25546,"2009":37291,"2006":6254,"2005":10703,"2007":11866,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-06","composition":{"2022":49034,"2023":76925,"2018":48697,"2024":75818,"2020":89213,"2021":85570,"2025":32977,"2019":44978,"2015":22919,"2010":14972,"2016":29396,"2012":54070,"2017":136354,"2014":14454,"2013":20607,"2011":40621,"2005":10703,"2008":25546,"2007":11865,"2006":6254,"2009":37289,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-07","composition":{"2022":48965,"2023":76778,"2021":85459,"2025":35930,"2018":48554,"2024":75473,"2020":89067,"2019":44888,"2015":22890,"2010":14950,"2016":29238,"2012":54062,"2017":136300,"2014":14447,"2013":20592,"2011":40589,"2008":25513,"2009":37219,"2006":6254,"2005":10699,"2007":11858,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-08","composition":{"2021":84949,"2025":39378,"2022":48746,"2023":76487,"2018":48540,"2019":44846,"2024":75168,"2020":88972,"2015":22809,"2010":14906,"2016":29174,"2012":54022,"2017":136261,"2014":14425,"2013":20457,"2011":40518,"2006":6254,"2008":25462,"2009":36955,"2005":10699,"2007":11844,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-09","composition":{"2022":48341,"2023":76041,"2021":81877,"2025":44888,"2018":48164,"2019":44345,"2020":88807,"2024":74576,"2015":22727,"2012":53973,"2016":28894,"2017":136220,"2010":14874,"2014":14395,"2013":20405,"2011":40432,"2008":25414,"2009":36912,"2005":10693,"2007":11841,"2006":6223,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-10","composition":{"2022":48193,"2023":75293,"2018":47992,"2019":44311,"2021":80322,"2025":53114,"2020":88604,"2024":72867,"2015":22561,"2010":14854,"2016":28862,"2012":53957,"2017":136093,"2014":14375,"2013":20347,"2011":40375,"2008":25189,"2009":36813,"2006":6195,"2005":10677,"2007":11833,"2004":17,"2002":442,"2003":46}},{"snapshot_date":"2025-11","composition":{"2022":47944,"2023":74782,"2021":79606,"2025":59992,"2018":47831,"2020":88251,"2024":72354,"2019":44191,"2015":22537,"2010":14843,"2016":28752,"2012":53950,"2017":135910,"2014":14359,"2013":20274,"2011":40175,"2006":6059,"2008":25102,"2009":36633,"2005":10673,"2007":11780,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2025-12","composition":{"2022":47372,"2023":72787,"2018":47273,"2025":66882,"2020":83382,"2021":77625,"2024":70724,"2019":42674,"2015":21873,"2010":14549,"2016":28263,"2012":53465,"2017":134788,"2014":13988,"2013":19504,"2011":39948,"2008":24669,"2009":34785,"2006":4411,"2005":6078,"2007":9963,"2004":17,"2002":439,"2003":46,"2026":4}},{"snapshot_date":"2026-01","composition":{"2022":47319,"2023":72372,"2021":76903,"2025":66534,"2018":47222,"2019":42563,"2020":83215,"2024":70327,"2015":21829,"2026":5958,"2010":14537,"2016":28262,"2012":53457,"2017":134734,"2014":13972,"2013":19463,"2011":39886,"2006":4399,"2005":6067,"2008":24649,"2007":9953,"2009":34693,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-02","composition":{"2021":76792,"2025":66399,"2022":47279,"2023":72221,"2018":46641,"2020":82838,"2024":69905,"2019":42547,"2015":21821,"2026":7958,"2010":14529,"2016":28172,"2012":53453,"2017":134717,"2014":13935,"2013":19456,"2011":39882,"2008":24622,"2009":34684,"2006":4398,"2005":6064,"2007":9946,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-03","composition":{"2021":76470,"2025":66092,"2022":47222,"2023":72001,"2018":46580,"2020":82576,"2024":69743,"2019":42499,"2015":21647,"2026":12114,"2010":14494,"2016":28145,"2012":53430,"2017":134676,"2014":13906,"2013":19444,"2011":39765,"2008":24608,"2009":34566,"2006":4397,"2005":6057,"2007":9938,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-04","composition":{"2022":46950,"2023":71633,"2021":76068,"2025":65188,"2018":46533,"2020":82479,"2024":69500,"2019":42465,"2015":21593,"2026":22321,"2010":14488,"2016":28127,"2012":53406,"2017":134598,"2014":13899,"2013":19408,"2011":39360,"2008":24570,"2009":34442,"2006":4392,"2005":6057,"2007":9933,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-05","composition":{"2021":75838,"2025":64924,"2018":46067,"2023":71355,"2022":44333,"2020":82388,"2024":69295,"2019":40932,"2015":21574,"2026":30358,"2010":14393,"2016":28123,"2012":53303,"2017":134591,"2014":13881,"2013":19370,"2011":39224,"2008":24569,"2009":34416,"2006":4384,"2005":6057,"2007":9932,"2004":17,"2002":439,"2003":46}}],"fossils":{"genesis":{"timestamp":1009260221,"file":"scipy_test/setup_scipy_test.py","content":"#!/usr/bin/env python","year":"2001","commit":"74a4958","view_commit":"74a4958b94e07b2e3218741fb52d32e87308d62e","line":1},"survivor":{"timestamp":1017446578,"file":"numpy/lib/_polynomial_impl.py","content":"def poly(seq_of_zeros):","year":"2002","commit":"0562713","view_commit":"main","line":40}}} \ No newline at end of file +{"snapshots":[{"snapshot_date":"2001-12","composition":{"2001":1865}},{"snapshot_date":"2002-03","composition":{"2002":94339,"2001":1472}},{"snapshot_date":"2002-06","composition":{"2002":102869,"2001":1179}},{"snapshot_date":"2002-09","composition":{"2002":130360,"2001":1167}},{"snapshot_date":"2002-12","composition":{"2002":132966,"2001":1130}},{"snapshot_date":"2003-03","composition":{"2002":132607,"2003":2305,"2001":1052}},{"snapshot_date":"2003-06","composition":{"2003":2688,"2002":132569,"2001":1047}},{"snapshot_date":"2003-09","composition":{"2003":3793,"2002":132461,"2001":1036}},{"snapshot_date":"2003-12","composition":{"2002":131017,"2003":5328,"2001":1009}},{"snapshot_date":"2004-03","composition":{"2002":129547,"2003":3964,"2004":5960,"2001":449}},{"snapshot_date":"2004-06","composition":{"2004":9689,"2002":129500,"2003":3916,"2001":443}},{"snapshot_date":"2004-09","composition":{"2004":10562,"2002":128923,"2003":3868,"2001":443}},{"snapshot_date":"2004-12","composition":{"2002":128551,"2003":3680,"2004":13272,"2001":437}},{"snapshot_date":"2005-03","composition":{"2002":128546,"2003":3676,"2004":13125,"2005":352,"2001":437}},{"snapshot_date":"2005-06","composition":{"2004":13106,"2005":1801,"2002":128527,"2003":3655,"2001":437}},{"snapshot_date":"2005-09","composition":{"2005":150609,"2002":120948,"2004":2743,"2003":2178}},{"snapshot_date":"2005-12","composition":{"2005":192483,"2002":95435,"2004":2503,"2003":1868}},{"snapshot_date":"2006-03","composition":{"2006":24364,"2005":147165,"2002":1852,"2004":152,"2003":357}},{"snapshot_date":"2006-06","composition":{"2006":42885,"2005":144356,"2002":1837,"2004":148,"2003":355}},{"snapshot_date":"2006-09","composition":{"2005":135193,"2006":106620,"2002":1809,"2004":145,"2003":339}},{"snapshot_date":"2006-12","composition":{"2006":82177,"2005":134843,"2002":1803,"2004":144,"2003":339}},{"snapshot_date":"2007-03","composition":{"2005":134590,"2006":76164,"2007":13620,"2002":1749,"2004":144,"2003":333}},{"snapshot_date":"2007-06","composition":{"2005":133132,"2006":74427,"2007":21742,"2002":1747,"2004":144,"2003":333}},{"snapshot_date":"2007-09","composition":{"2006":61136,"2005":124879,"2007":49906,"2002":1742,"2004":144,"2003":333}},{"snapshot_date":"2007-12","composition":{"2006":60016,"2007":58304,"2005":123821,"2002":1739,"2004":144,"2003":332}},{"snapshot_date":"2008-03","composition":{"2005":122014,"2007":61831,"2006":58723,"2008":11648,"2002":1736,"2004":144,"2003":328}},{"snapshot_date":"2008-06","composition":{"2005":120599,"2006":52922,"2008":35857,"2007":54121,"2002":1602,"2004":141,"2003":316}},{"snapshot_date":"2008-09","composition":{"2005":119750,"2006":33511,"2008":136007,"2007":46987,"2002":1353,"2004":138,"2003":281}},{"snapshot_date":"2008-12","composition":{"2005":118888,"2006":33032,"2007":45694,"2008":162098,"2002":1352,"2004":138,"2003":281}},{"snapshot_date":"2009-03","composition":{"2005":118010,"2006":31300,"2007":40897,"2008":155221,"2009":45419,"2002":1351,"2004":138,"2003":278}},{"snapshot_date":"2009-06","composition":{"2005":116213,"2006":26011,"2008":148517,"2009":87758,"2007":30084,"2002":1346,"2004":138,"2003":278}},{"snapshot_date":"2009-09","composition":{"2005":116065,"2006":25778,"2007":29821,"2008":146944,"2009":100827,"2002":1158,"2004":136,"2003":278}},{"snapshot_date":"2009-12","composition":{"2008":143661,"2009":145005,"2006":25011,"2005":113855,"2007":28736,"2002":1158,"2004":136,"2003":273}},{"snapshot_date":"2010-03","composition":{"2009":136932,"2008":142180,"2005":111850,"2006":24474,"2007":28449,"2010":17329,"2002":1064,"2004":123,"2003":259}},{"snapshot_date":"2010-06","composition":{"2005":111719,"2006":23646,"2007":28023,"2008":141950,"2009":136184,"2010":23093,"2002":1057,"2004":123,"2003":259}},{"snapshot_date":"2010-09","composition":{"2010":28562,"2009":135219,"2006":23589,"2005":111669,"2008":141483,"2007":25037,"2002":1055,"2004":123,"2003":259}},{"snapshot_date":"2010-12","composition":{"2010":43152,"2009":125155,"2006":23469,"2005":111642,"2008":140070,"2007":24957,"2002":1046,"2004":123,"2003":259}},{"snapshot_date":"2011-03","composition":{"2010":49445,"2005":111319,"2006":23132,"2007":23654,"2011":38997,"2009":119000,"2008":138780,"2002":1039,"2004":123,"2003":257}},{"snapshot_date":"2011-06","composition":{"2010":48692,"2011":58195,"2008":138338,"2009":115195,"2006":22996,"2005":111133,"2007":23524,"2002":995,"2004":122,"2003":257}},{"snapshot_date":"2011-09","composition":{"2011":88031,"2010":46487,"2008":137097,"2009":112421,"2006":22699,"2005":110990,"2007":22880,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2011-12","composition":{"2010":46148,"2011":92965,"2005":110983,"2006":22541,"2007":22841,"2008":137041,"2009":110759,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2012-03","composition":{"2010":42510,"2011":95632,"2006":22193,"2005":110401,"2007":22505,"2009":108210,"2008":136391,"2012":12933,"2002":995,"2004":121,"2003":257}},{"snapshot_date":"2012-06","composition":{"2010":41767,"2012":20487,"2011":82718,"2006":21976,"2009":107301,"2005":110335,"2007":22419,"2008":136224,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2012-09","composition":{"2010":41149,"2012":30421,"2011":79589,"2005":110282,"2006":21736,"2007":22074,"2009":105328,"2008":135866,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2012-12","composition":{"2010":40909,"2012":32983,"2011":79086,"2005":110248,"2006":21734,"2007":22062,"2008":135824,"2009":104923,"2002":990,"2004":121,"2003":255}},{"snapshot_date":"2013-03","composition":{"2012":29693,"2010":40794,"2011":78031,"2013":7110,"2005":109997,"2006":21580,"2007":21690,"2008":134251,"2009":103977,"2002":989,"2004":121,"2003":254}},{"snapshot_date":"2013-06","composition":{"2012":183454,"2010":39837,"2011":77317,"2013":24040,"2005":35090,"2006":21080,"2007":21351,"2009":101933,"2008":133562,"2002":987,"2004":120,"2003":253}},{"snapshot_date":"2013-09","composition":{"2012":181910,"2013":44136,"2010":37648,"2011":75123,"2006":14587,"2009":97168,"2005":33008,"2007":20078,"2008":127205,"2002":711,"2004":78,"2003":252}},{"snapshot_date":"2013-12","composition":{"2013":46300,"2012":180411,"2010":37466,"2011":74810,"2006":14451,"2009":96747,"2008":126806,"2005":32818,"2007":19912,"2002":709,"2004":77,"2003":252}},{"snapshot_date":"2014-03","composition":{"2010":36748,"2013":43606,"2012":178530,"2011":73040,"2014":18240,"2006":13029,"2009":94898,"2005":30273,"2007":19769,"2008":45858,"2002":696,"2004":77,"2003":252}},{"snapshot_date":"2014-06","composition":{"2010":36188,"2012":171754,"2014":13714,"2013":41708,"2011":70672,"2005":30161,"2006":12022,"2007":18289,"2009":87455,"2008":45113,"2002":695,"2004":77,"2003":252}},{"snapshot_date":"2014-09","composition":{"2012":171112,"2014":22362,"2013":40712,"2010":35603,"2011":69888,"2005":29302,"2006":11875,"2007":18123,"2009":86038,"2008":44758,"2002":635,"2004":60,"2003":252}},{"snapshot_date":"2014-12","composition":{"2010":35489,"2014":25570,"2013":40593,"2012":170931,"2011":69766,"2006":11771,"2009":85664,"2005":29175,"2007":17939,"2008":44138,"2002":634,"2004":60,"2003":252}},{"snapshot_date":"2015-03","composition":{"2013":39957,"2010":35074,"2015":6679,"2012":170636,"2014":26701,"2011":69486,"2008":43679,"2009":84818,"2005":29095,"2006":11512,"2007":17877,"2002":634,"2004":60,"2003":252}},{"snapshot_date":"2015-06","composition":{"2015":12670,"2012":170329,"2010":34951,"2014":27726,"2013":39699,"2011":68961,"2005":29020,"2006":11386,"2007":17747,"2008":43249,"2009":84286,"2002":631,"2004":60,"2003":252}},{"snapshot_date":"2015-09","composition":{"2015":29669,"2010":34469,"2014":27200,"2012":169803,"2013":37171,"2011":67345,"2006":10984,"2009":83646,"2005":26685,"2007":16829,"2008":41734,"2002":630,"2004":56,"2003":234}},{"snapshot_date":"2015-12","composition":{"2010":34274,"2014":26998,"2013":37010,"2015":35389,"2012":169642,"2011":66650,"2008":41411,"2009":83086,"2006":10946,"2005":26566,"2007":16737,"2002":629,"2004":56,"2003":234}},{"snapshot_date":"2016-03","composition":{"2012":169383,"2015":36532,"2016":7776,"2010":34125,"2014":26521,"2013":36586,"2006":10683,"2008":41062,"2009":82603,"2011":65855,"2005":26355,"2007":16480,"2002":629,"2004":56,"2003":234}},{"snapshot_date":"2016-06","composition":{"2013":36522,"2010":34002,"2015":36608,"2012":169365,"2014":26371,"2011":65773,"2016":10759,"2008":40860,"2009":82403,"2006":10650,"2005":26319,"2007":16317,"2002":623,"2004":56,"2003":234}},{"snapshot_date":"2016-09","composition":{"2012":169253,"2015":36237,"2016":15735,"2010":33913,"2014":26125,"2013":36265,"2008":40740,"2009":82284,"2011":65666,"2006":10609,"2005":26245,"2007":16285,"2002":623,"2004":56,"2003":234}},{"snapshot_date":"2016-12","composition":{"2015":36071,"2012":169155,"2016":21595,"2014":25757,"2013":35985,"2010":33535,"2011":65332,"2008":40607,"2009":82163,"2005":26216,"2007":16223,"2006":10541,"2002":622,"2004":56,"2003":233}},{"snapshot_date":"2017-03","composition":{"2014":25576,"2017":110922,"2015":35648,"2013":35176,"2012":62863,"2016":39453,"2010":33095,"2011":65019,"2008":39631,"2009":68413,"2006":10382,"2005":25919,"2007":16089,"2002":570,"2004":51,"2003":233}},{"snapshot_date":"2017-06","composition":{"2014":24843,"2017":120751,"2015":35392,"2012":62775,"2016":38979,"2013":33867,"2010":32885,"2011":64813,"2008":39305,"2009":66773,"2006":10225,"2005":17793,"2007":15934,"2002":563,"2004":49,"2003":233}},{"snapshot_date":"2017-09","composition":{"2014":24395,"2017":133110,"2012":62537,"2015":34827,"2016":38009,"2013":33024,"2010":32136,"2011":63614,"2006":10061,"2008":38254,"2009":65072,"2005":17186,"2007":15636,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2017-12","composition":{"2017":149940,"2015":34262,"2016":37400,"2012":62341,"2013":32565,"2014":24265,"2010":31890,"2011":63048,"2008":37707,"2009":64466,"2006":9933,"2005":17051,"2007":15468,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2018-03","composition":{"2014":22677,"2017":150500,"2015":33944,"2016":36999,"2013":32357,"2012":61939,"2018":5510,"2010":31760,"2011":62549,"2006":9853,"2008":37535,"2009":64198,"2005":16919,"2007":15411,"2002":559,"2004":49,"2003":233}},{"snapshot_date":"2018-06","composition":{"2018":19708,"2013":31673,"2017":150670,"2015":33690,"2016":36365,"2010":31401,"2012":61569,"2014":22274,"2011":61256,"2008":36911,"2009":63381,"2006":9774,"2005":16840,"2007":15269,"2002":550,"2004":49,"2003":232}},{"snapshot_date":"2018-09","composition":{"2017":150375,"2015":33569,"2018":30527,"2016":36174,"2014":21992,"2013":31339,"2010":31239,"2012":61426,"2011":60402,"2008":36478,"2009":62752,"2005":16558,"2007":15145,"2006":9397,"2002":543,"2004":49,"2003":232}},{"snapshot_date":"2018-12","composition":{"2018":48265,"2017":149585,"2015":32778,"2016":35741,"2014":21809,"2010":31055,"2012":61190,"2013":31020,"2011":59340,"2005":14910,"2009":60872,"2006":9063,"2008":35767,"2007":15047,"2002":539,"2004":49,"2003":232}},{"snapshot_date":"2019-03","composition":{"2018":49111,"2019":9272,"2015":32514,"2017":149531,"2016":35564,"2013":30072,"2014":21280,"2012":60851,"2008":35532,"2009":60367,"2010":30642,"2011":58341,"2006":9016,"2005":14835,"2007":15001,"2002":539,"2004":49,"2003":232}},{"snapshot_date":"2019-06","composition":{"2017":149695,"2015":32943,"2018":41877,"2016":36017,"2014":21938,"2013":31076,"2010":31147,"2012":61227,"2011":59492,"2008":36237,"2009":61872,"2006":9065,"2005":16430,"2007":15084,"2002":539,"2004":49,"2019":64,"2003":232}},{"snapshot_date":"2019-09","composition":{"2018":66238,"2019":47674,"2017":149155,"2015":31132,"2016":34764,"2014":20457,"2013":29628,"2010":30023,"2012":60316,"2011":57507,"2008":33249,"2009":57563,"2006":8623,"2005":12783,"2007":14521,"2002":538,"2004":49,"2003":231}},{"snapshot_date":"2019-12","composition":{"2018":65394,"2019":56820,"2014":19795,"2017":148850,"2015":30831,"2010":29927,"2016":34561,"2012":60140,"2013":29363,"2011":57286,"2008":32874,"2009":57304,"2006":8557,"2005":12717,"2007":14497,"2002":537,"2004":49,"2003":231}},{"snapshot_date":"2020-03","composition":{"2018":64016,"2019":57180,"2020":24182,"2015":30240,"2014":19448,"2017":147161,"2013":28115,"2012":59428,"2016":33939,"2010":27781,"2011":55546,"2008":32306,"2009":55012,"2006":8293,"2005":12409,"2007":14306,"2002":531,"2004":49,"2003":230}},{"snapshot_date":"2020-06","composition":{"2018":63130,"2019":56788,"2020":48902,"2015":30033,"2014":19034,"2017":147034,"2010":27613,"2016":33750,"2012":59098,"2013":27979,"2011":54685,"2008":32160,"2009":54186,"2006":8235,"2005":12251,"2007":14291,"2002":531,"2004":49,"2003":230}},{"snapshot_date":"2020-09","composition":{"2018":62241,"2019":55422,"2020":77584,"2014":18764,"2017":145552,"2015":29338,"2010":19266,"2016":33581,"2012":58501,"2013":27478,"2011":51167,"2005":12176,"2009":47014,"2008":30427,"2006":8067,"2007":14242,"2002":527,"2004":49,"2003":230}},{"snapshot_date":"2020-12","composition":{"2018":61019,"2020":114533,"2019":54317,"2014":18594,"2017":145129,"2015":28817,"2012":58445,"2016":33480,"2013":26876,"2010":19070,"2011":50577,"2008":30008,"2009":46735,"2006":8047,"2005":12142,"2007":14182,"2002":526,"2004":49,"2003":230}},{"snapshot_date":"2021-03","composition":{"2018":61349,"2020":104740,"2019":54845,"2014":18621,"2017":145434,"2015":29101,"2010":19104,"2016":33526,"2012":58457,"2013":27231,"2011":50614,"2008":30056,"2009":46855,"2006":8051,"2005":12145,"2007":14189,"2021":2222,"2002":527,"2004":49,"2003":230}},{"snapshot_date":"2021-06","composition":{"2020":112039,"2021":45060,"2015":28456,"2019":53018,"2018":59930,"2010":18935,"2016":33218,"2012":58314,"2017":144293,"2014":18363,"2013":26466,"2011":47525,"2006":8013,"2008":30260,"2009":46200,"2005":12104,"2007":14141,"2002":524,"2004":49,"2003":230}},{"snapshot_date":"2021-09","composition":{"2021":97407,"2018":59052,"2020":109278,"2019":52650,"2015":27816,"2012":58200,"2016":33061,"2017":143713,"2013":25955,"2010":18656,"2014":17505,"2011":46683,"2008":29288,"2009":45809,"2006":7931,"2005":11659,"2007":13859,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2021-12","composition":{"2018":58489,"2021":121839,"2020":106659,"2019":52144,"2015":27360,"2010":17939,"2016":32977,"2012":57388,"2017":143312,"2014":16983,"2013":25758,"2011":46229,"2008":29088,"2009":45709,"2006":7907,"2005":11544,"2007":13832,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2022-03","composition":{"2018":58156,"2019":51279,"2021":125507,"2020":105377,"2015":26951,"2022":17766,"2010":17808,"2016":32844,"2012":56311,"2017":143069,"2014":16635,"2013":25031,"2011":46017,"2006":7895,"2008":28815,"2009":45556,"2005":11537,"2007":13822,"2002":519,"2004":48,"2003":230}},{"snapshot_date":"2022-06","composition":{"2018":57807,"2021":126201,"2019":50945,"2015":26535,"2020":104275,"2022":37666,"2010":17599,"2016":32719,"2012":55823,"2017":142586,"2014":16534,"2013":24376,"2011":45697,"2006":7797,"2008":28642,"2009":44994,"2005":11438,"2007":13733,"2002":516,"2004":48,"2003":229}},{"snapshot_date":"2022-09","composition":{"2015":26468,"2021":125733,"2019":50800,"2020":104008,"2018":57726,"2022":42875,"2010":17534,"2016":32655,"2012":55811,"2017":142510,"2014":16422,"2013":24318,"2011":45687,"2008":28526,"2009":44517,"2006":7794,"2005":11438,"2007":13668,"2002":516,"2004":48,"2003":229}},{"snapshot_date":"2022-12","composition":{"2018":57356,"2021":124792,"2020":103301,"2022":55956,"2019":50605,"2015":25906,"2010":17490,"2016":32562,"2012":55746,"2017":142114,"2014":16365,"2013":23784,"2011":45494,"2008":28450,"2009":44247,"2006":7761,"2005":11405,"2007":13577,"2003":229,"2004":48,"2002":516}},{"snapshot_date":"2023-03","composition":{"2022":59169,"2023":10359,"2021":120525,"2018":56770,"2019":49742,"2020":100987,"2015":25710,"2012":55648,"2016":32315,"2013":23482,"2017":140354,"2010":17341,"2014":16282,"2011":45145,"2008":28012,"2009":44088,"2006":7683,"2005":11389,"2007":13558,"2002":516,"2004":48,"2003":228}},{"snapshot_date":"2023-06","composition":{"2022":58438,"2023":34208,"2018":56373,"2021":107985,"2019":49163,"2020":100184,"2015":25495,"2012":55502,"2016":32250,"2017":139945,"2010":16797,"2014":16125,"2013":23318,"2011":44806,"2006":7209,"2008":27784,"2009":43728,"2005":11334,"2007":13493,"2002":516,"2004":48,"2003":227}},{"snapshot_date":"2023-09","composition":{"2022":55701,"2023":62492,"2018":55435,"2021":100402,"2019":47753,"2020":94592,"2015":24632,"2012":55265,"2016":30433,"2017":138546,"2010":16400,"2014":15672,"2013":22552,"2011":44120,"2008":26921,"2009":41512,"2006":6928,"2005":11158,"2007":13056,"2004":48,"2003":220,"2002":509}},{"snapshot_date":"2023-12","composition":{"2018":53945,"2021":98880,"2022":54600,"2023":91904,"2019":47067,"2020":93527,"2015":24156,"2012":54822,"2016":30177,"2017":137828,"2010":15736,"2014":15179,"2013":21937,"2011":43821,"2008":26233,"2009":40323,"2006":6464,"2005":10910,"2007":12624,"2004":31,"2003":217,"2002":486}},{"snapshot_date":"2024-03","composition":{"2018":50761,"2021":93663,"2022":52840,"2023":83911,"2024":30118,"2020":92796,"2019":46690,"2015":24044,"2012":54601,"2016":30014,"2017":137644,"2010":15661,"2014":15119,"2013":21678,"2011":43446,"2008":26095,"2009":39081,"2006":6389,"2005":10883,"2007":12600,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-06","composition":{"2018":50598,"2022":52335,"2023":82879,"2021":93119,"2019":46371,"2024":39429,"2020":92289,"2015":23986,"2012":54472,"2016":29981,"2017":137433,"2010":15623,"2014":15091,"2013":21640,"2011":43348,"2006":6379,"2008":26022,"2005":10869,"2009":39036,"2007":12593,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-09","composition":{"2022":51246,"2023":81060,"2018":49868,"2021":91300,"2019":45868,"2024":58495,"2020":91665,"2015":23851,"2012":54407,"2016":29872,"2017":137201,"2008":25874,"2009":38030,"2014":14959,"2010":15329,"2013":21558,"2011":42777,"2006":6362,"2005":10829,"2007":12568,"2004":31,"2003":217,"2002":485}},{"snapshot_date":"2024-12","composition":{"2022":50652,"2023":79580,"2021":89442,"2018":49399,"2024":79718,"2020":90816,"2019":45459,"2015":23478,"2010":15070,"2016":29631,"2012":54260,"2017":136825,"2014":14558,"2013":20957,"2011":40920,"2008":25636,"2009":37605,"2006":6288,"2005":10787,"2007":12206,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-01","composition":{"2022":50616,"2023":79451,"2018":49378,"2019":45451,"2021":89244,"2024":80020,"2020":90760,"2025":2383,"2015":23478,"2010":15065,"2016":29621,"2012":54234,"2017":136711,"2014":14557,"2013":20932,"2011":40903,"2008":25635,"2009":37562,"2006":6288,"2005":10787,"2007":12206,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-02","composition":{"2021":89104,"2025":6210,"2022":50497,"2023":79170,"2018":49255,"2024":79734,"2020":90255,"2019":45295,"2015":23474,"2010":15058,"2016":29571,"2012":54148,"2017":136630,"2014":14553,"2013":20915,"2011":40815,"2006":6278,"2008":25633,"2009":37544,"2005":10787,"2007":12204,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-03","composition":{"2023":78817,"2018":49216,"2024":78977,"2022":49805,"2020":90085,"2021":88042,"2025":12625,"2019":45287,"2015":23424,"2010":15051,"2016":29570,"2012":54148,"2017":136627,"2014":14545,"2013":20864,"2011":40815,"2008":25623,"2009":37543,"2006":6278,"2005":10787,"2007":11959,"2004":23,"2003":211,"2002":469}},{"snapshot_date":"2025-04","composition":{"2018":49092,"2021":86817,"2022":49392,"2023":77780,"2024":77274,"2020":89805,"2025":19503,"2019":45201,"2015":23102,"2010":15012,"2016":29497,"2012":54125,"2017":136524,"2014":14525,"2013":20769,"2011":40682,"2008":25591,"2009":37512,"2006":6270,"2005":10762,"2007":11951,"2004":23,"2003":211,"2002":464}},{"snapshot_date":"2025-05","composition":{"2022":49090,"2023":77083,"2021":85761,"2025":27815,"2018":48737,"2024":76096,"2020":89333,"2019":45009,"2015":22923,"2010":14979,"2016":29405,"2012":54096,"2017":136356,"2014":14458,"2013":20628,"2011":40637,"2008":25546,"2009":37291,"2006":6254,"2005":10703,"2007":11866,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-06","composition":{"2022":49034,"2023":76925,"2018":48697,"2024":75818,"2020":89213,"2021":85570,"2025":32977,"2019":44978,"2015":22919,"2010":14972,"2016":29396,"2012":54070,"2017":136354,"2014":14454,"2013":20607,"2011":40621,"2005":10703,"2008":25546,"2007":11865,"2006":6254,"2009":37289,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-07","composition":{"2022":48965,"2023":76778,"2021":85459,"2025":35930,"2018":48554,"2024":75473,"2020":89067,"2019":44888,"2015":22890,"2010":14950,"2016":29238,"2012":54062,"2017":136300,"2014":14447,"2013":20592,"2011":40589,"2008":25513,"2009":37219,"2006":6254,"2005":10699,"2007":11858,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-08","composition":{"2021":84949,"2025":39378,"2022":48746,"2023":76487,"2018":48540,"2019":44846,"2024":75168,"2020":88972,"2015":22809,"2010":14906,"2016":29174,"2012":54022,"2017":136261,"2014":14425,"2013":20457,"2011":40518,"2006":6254,"2008":25462,"2009":36955,"2005":10699,"2007":11844,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-09","composition":{"2022":48341,"2023":76041,"2021":81877,"2025":44888,"2018":48164,"2019":44345,"2020":88807,"2024":74576,"2015":22727,"2012":53973,"2016":28894,"2017":136220,"2010":14874,"2014":14395,"2013":20405,"2011":40432,"2008":25414,"2009":36912,"2005":10693,"2007":11841,"2006":6223,"2004":22,"2003":211,"2002":450}},{"snapshot_date":"2025-10","composition":{"2022":48193,"2023":75293,"2018":47992,"2019":44311,"2021":80322,"2025":53114,"2020":88604,"2024":72867,"2015":22561,"2010":14854,"2016":28862,"2012":53957,"2017":136093,"2014":14375,"2013":20347,"2011":40375,"2008":25189,"2009":36813,"2006":6195,"2005":10677,"2007":11833,"2004":17,"2002":442,"2003":46}},{"snapshot_date":"2025-11","composition":{"2022":47944,"2023":74782,"2021":79606,"2025":59992,"2018":47831,"2020":88251,"2024":72354,"2019":44191,"2015":22537,"2010":14843,"2016":28752,"2012":53950,"2017":135910,"2014":14359,"2013":20274,"2011":40175,"2006":6059,"2008":25102,"2009":36633,"2005":10673,"2007":11780,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2025-12","composition":{"2022":47372,"2023":72787,"2018":47273,"2025":66882,"2020":83382,"2021":77625,"2024":70724,"2019":42674,"2015":21873,"2010":14549,"2016":28263,"2012":53465,"2017":134788,"2014":13988,"2013":19504,"2011":39948,"2008":24669,"2009":34785,"2006":4411,"2005":6078,"2007":9963,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-01","composition":{"2022":47319,"2023":72372,"2021":76903,"2025":66534,"2018":47222,"2019":42563,"2020":83215,"2024":70327,"2015":21829,"2026":5958,"2010":14537,"2016":28262,"2012":53457,"2017":134734,"2014":13972,"2013":19463,"2011":39886,"2006":4399,"2005":6067,"2008":24649,"2007":9953,"2009":34693,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-02","composition":{"2021":76792,"2025":66399,"2022":47279,"2023":72221,"2018":46641,"2020":82838,"2024":69905,"2019":42547,"2015":21821,"2026":7958,"2010":14529,"2016":28172,"2012":53453,"2017":134717,"2014":13935,"2013":19456,"2011":39882,"2008":24622,"2009":34684,"2006":4398,"2005":6064,"2007":9946,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-03","composition":{"2021":76470,"2025":66092,"2022":47222,"2023":72001,"2018":46580,"2020":82576,"2024":69743,"2019":42499,"2015":21647,"2026":12114,"2010":14494,"2016":28145,"2012":53430,"2017":134676,"2014":13906,"2013":19444,"2011":39765,"2008":24608,"2009":34566,"2006":4397,"2005":6057,"2007":9938,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-04","composition":{"2022":46950,"2023":71633,"2021":76068,"2025":65188,"2018":46533,"2020":82479,"2024":69500,"2019":42465,"2015":21593,"2026":22321,"2010":14488,"2016":28127,"2012":53406,"2017":134598,"2014":13899,"2013":19408,"2011":39360,"2008":24570,"2009":34442,"2006":4392,"2005":6057,"2007":9933,"2004":17,"2002":439,"2003":46}},{"snapshot_date":"2026-05","composition":{"2021":75838,"2025":64924,"2018":46067,"2023":71355,"2022":44333,"2020":82388,"2024":69295,"2019":40932,"2015":21574,"2026":30358,"2010":14393,"2016":28123,"2012":53303,"2017":134591,"2014":13881,"2013":19370,"2011":39224,"2008":24569,"2009":34416,"2006":4384,"2005":6057,"2007":9932,"2004":17,"2002":439,"2003":46}}],"fossils":{"genesis":{"timestamp":1009260221,"file":"scipy_test/setup_scipy_test.py","content":"#!/usr/bin/env python","year":"2001","commit":"74a4958","view_commit":"74a4958b94e07b2e3218741fb52d32e87308d62e","line":1},"survivor":{"timestamp":1017446578,"file":"numpy/lib/_polynomial_impl.py","content":"def poly(seq_of_zeros):","year":"2002","commit":"0562713","view_commit":"main","line":40}}} \ No newline at end of file diff --git a/data/react_data.json b/data/react_data.json index 74cb247..0b81d05 100644 --- a/data/react_data.json +++ b/data/react_data.json @@ -1 +1 @@ -{"snapshots":[{"snapshot_date":"2013-06","composition":{"2013":49299}},{"snapshot_date":"2013-09","composition":{"2013":61256}},{"snapshot_date":"2013-12","composition":{"2013":130512,"2014":6}},{"snapshot_date":"2014-03","composition":{"2014":23020,"2013":123687}},{"snapshot_date":"2014-06","composition":{"2013":122493,"2014":30244}},{"snapshot_date":"2014-09","composition":{"2013":120244,"2014":44148}},{"snapshot_date":"2014-12","composition":{"2013":114261,"2014":70090}},{"snapshot_date":"2015-03","composition":{"2014":67640,"2015":84107,"2013":110637}},{"snapshot_date":"2015-06","composition":{"2014":63060,"2013":107612,"2015":103761}},{"snapshot_date":"2015-09","composition":{"2014":61104,"2015":128842,"2013":99577}},{"snapshot_date":"2015-12","composition":{"2014":65919,"2013":97668,"2015":152156}},{"snapshot_date":"2016-03","composition":{"2013":97380,"2015":151704,"2014":59135,"2016":15687}},{"snapshot_date":"2016-06","composition":{"2016":60253,"2013":92596,"2015":144933,"2014":58094}},{"snapshot_date":"2016-09","composition":{"2014":56579,"2016":107146,"2015":127449,"2013":90605}},{"snapshot_date":"2016-12","composition":{"2014":53620,"2016":171100,"2015":93767,"2013":87505}},{"snapshot_date":"2017-03","composition":{"2016":153719,"2017":40564,"2013":85813,"2015":90162,"2014":51722}},{"snapshot_date":"2017-06","composition":{"2016":112177,"2017":100865,"2013":80747,"2015":85364,"2014":49079}},{"snapshot_date":"2017-09","composition":{"2014":42959,"2016":95520,"2017":170284,"2015":78029,"2013":71880}},{"snapshot_date":"2017-12","composition":{"2016":32719,"2017":141395,"2014":3091,"2013":4275,"2015":9115}},{"snapshot_date":"2018-03","composition":{"2017":132926,"2013":4240,"2015":8893,"2014":2840,"2016":31430,"2018":29528}},{"snapshot_date":"2018-06","composition":{"2016":28845,"2017":125268,"2018":50483,"2015":8742,"2013":4144,"2014":2801}},{"snapshot_date":"2018-09","composition":{"2017":121415,"2018":89215,"2016":27784,"2013":4030,"2015":8679,"2014":2750}},{"snapshot_date":"2018-12","composition":{"2017":119427,"2018":110439,"2016":27687,"2014":2745,"2013":4028,"2015":8663}},{"snapshot_date":"2019-03","composition":{"2019":27130}},{"snapshot_date":"2019-06","composition":{"2017":117513,"2015":8651,"2016":26894,"2018":88585,"2019":65595,"2013":4006,"2014":2733}},{"snapshot_date":"2019-09","composition":{"2017":116862,"2019":154937,"2018":85418,"2016":26444,"2015":8633,"2013":4006,"2014":2731}},{"snapshot_date":"2019-12","composition":{"2017":115372,"2019":178970,"2018":80928,"2016":26181,"2015":8651,"2014":2731,"2013":4002}},{"snapshot_date":"2020-03","composition":{"2016":25817,"2018":75202,"2020":42887,"2017":112448,"2019":158206,"2013":3752,"2015":8488,"2014":2625}},{"snapshot_date":"2020-06","composition":{"2019":149670,"2020":109834,"2016":24821,"2018":74006,"2017":111520,"2015":8475,"2014":2635,"2013":3705}},{"snapshot_date":"2020-09","composition":{"2019":132657,"2020":140316,"2017":109379,"2018":71429,"2016":24599,"2015":8365,"2014":2109,"2013":3381}},{"snapshot_date":"2020-12","composition":{"2019":127254,"2020":182464,"2017":106722,"2018":71568,"2013":3381,"2015":8365,"2014":2109,"2016":24570}},{"snapshot_date":"2021-03","composition":{"2019":122222,"2021":25610,"2020":172383,"2017":106608,"2018":70547,"2016":24558,"2015":8363,"2013":3381,"2014":2109}},{"snapshot_date":"2021-06","composition":{"2019":116227,"2021":70698,"2017":104705,"2020":162225,"2018":64628,"2016":24421,"2015":8330,"2014":2097,"2013":3366}},{"snapshot_date":"2021-09","composition":{"2020":153417,"2019":114206,"2021":117474,"2017":104170,"2018":63959,"2015":8185,"2016":24311,"2014":2047,"2013":3347}},{"snapshot_date":"2021-12","composition":{"2021":136954,"2017":103766,"2019":112937,"2020":150902,"2018":63738,"2014":2047,"2016":24244,"2015":8119,"2013":3347}},{"snapshot_date":"2022-03","composition":{"2019":107105,"2021":130490,"2020":147765,"2017":103594,"2015":8093,"2016":24233,"2022":24396,"2018":63526,"2014":2036,"2013":3347}},{"snapshot_date":"2022-06","composition":{"2020":145774,"2019":105925,"2021":127685,"2017":103566,"2022":49310,"2018":62370,"2015":8081,"2016":24217,"2014":2036,"2013":3343}},{"snapshot_date":"2022-09","composition":{"2016":24202,"2018":61123,"2020":139446,"2017":102772,"2019":100139,"2021":124382,"2022":81289,"2014":2036,"2013":3343,"2015":8069}},{"snapshot_date":"2022-12","composition":{"2019":97166,"2021":116918,"2017":101538,"2022":90700,"2020":119753,"2018":59933,"2015":8036,"2016":24147,"2014":2030,"2013":3308}},{"snapshot_date":"2023-03","composition":{"2019":92349,"2021":109360,"2023":49082,"2020":98523,"2014":1943,"2017":97452,"2022":74008,"2016":23525,"2018":57598,"2013":3211,"2015":7685}},{"snapshot_date":"2023-06","composition":{"2019":90609,"2021":105316,"2023":71271,"2017":96874,"2022":71044,"2020":95370,"2016":23444,"2018":57130,"2015":7678,"2014":1942,"2013":3203}},{"snapshot_date":"2023-09","composition":{"2020":94881,"2019":89683,"2021":104279,"2023":89091,"2017":96761,"2022":70205,"2016":23437,"2018":56929,"2014":1942,"2015":7678,"2013":3203}},{"snapshot_date":"2023-12","composition":{"2023":322197,"2021":58572,"2022":9860}},{"snapshot_date":"2024-03","composition":{"2019":78286,"2021":98780,"2023":91993,"2017":92511,"2015":6865,"2016":21601,"2020":89985,"2013":2982,"2014":1611,"2024":56403,"2018":52282,"2022":64120}},{"snapshot_date":"2024-06","composition":{"2024":146695,"2021":109652,"2017":91402,"2019":76480,"2022":69399,"2023":387233,"2020":86381,"2013":2827,"2015":6451,"2014":1434,"2016":21189,"2018":51177}},{"snapshot_date":"2024-09","composition":{"2016":21077,"2018":50727,"2020":84491,"2022":66008,"2024":207309,"2017":91319,"2019":73735,"2021":106604,"2023":372741,"2014":1434,"2015":6450,"2013":2827}},{"snapshot_date":"2024-12","composition":{"2020":83431,"2021":105682,"2024":235398,"2016":21010,"2018":49947,"2022":64957,"2017":91080,"2019":72870,"2023":369720,"2015":6409,"2013":2798,"2014":1424}},{"snapshot_date":"2025-01","composition":{"2024":228979,"2019":72002,"2021":105295,"2023":146990,"2020":81905,"2017":90754,"2015":6344,"2016":20945,"2013":2794,"2014":1412,"2018":48950,"2022":64653,"2025":30519}},{"snapshot_date":"2025-02","composition":{"2024":226806,"2019":70686,"2021":105235,"2023":145909,"2020":81740,"2017":90722,"2015":6341,"2016":20945,"2018":48854,"2022":64519,"2025":43401,"2013":2793,"2014":1412}},{"snapshot_date":"2025-03","composition":{"2024":221556,"2019":70526,"2021":105086,"2023":144980,"2020":81491,"2017":90713,"2015":6341,"2016":20943,"2018":48777,"2022":63742,"2025":76772,"2013":2793,"2014":1412}},{"snapshot_date":"2025-04","composition":{"2024":220576,"2019":70454,"2025":94566,"2021":103830,"2023":143787,"2020":81428,"2017":90707,"2015":6341,"2016":20943,"2018":48770,"2022":63508,"2014":1412,"2013":2793}},{"snapshot_date":"2025-05","composition":{"2024":218220,"2019":70363,"2025":112419,"2021":103777,"2023":142803,"2020":81289,"2017":90702,"2015":6341,"2016":20942,"2018":48749,"2022":63378,"2014":1412,"2013":2793}},{"snapshot_date":"2025-06","composition":{"2024":215878,"2019":70222,"2025":137311,"2021":103737,"2023":142387,"2017":90701,"2015":6341,"2016":20939,"2020":81261,"2018":48706,"2022":63159,"2013":2793,"2014":1412}},{"snapshot_date":"2025-07","composition":{"2024":212777,"2019":69634,"2025":153753,"2021":103393,"2023":141860,"2020":81061,"2017":90690,"2015":6334,"2016":20929,"2014":1412,"2013":2793,"2018":48343,"2022":62740}},{"snapshot_date":"2025-08","composition":{"2024":209351,"2019":69500,"2025":185075,"2021":103254,"2020":80847,"2016":20929,"2018":48330,"2022":62013,"2017":90685,"2023":140410,"2014":1412,"2015":6334,"2013":2793}},{"snapshot_date":"2025-09","composition":{"2024":208280,"2019":69355,"2025":199735,"2021":103075,"2020":80758,"2017":90684,"2015":6334,"2016":20929,"2014":1412,"2018":48324,"2022":61753,"2023":139982,"2013":2793}},{"snapshot_date":"2025-10","composition":{"2024":207431,"2019":69272,"2025":209366,"2021":103037,"2020":80677,"2017":90684,"2015":6334,"2016":20929,"2013":2793,"2014":1412,"2018":48322,"2022":61695,"2023":139651}},{"snapshot_date":"2025-11","composition":{"2019":69017,"2025":220684,"2021":103016,"2024":205775,"2017":90652,"2015":6334,"2016":20915,"2020":80599,"2013":2793,"2014":1412,"2018":48289,"2022":61679,"2023":136904}},{"snapshot_date":"2025-12","composition":{"2024":205425,"2019":68968,"2025":226961,"2021":103003,"2020":80579,"2017":90637,"2015":4434,"2016":20915,"2013":2793,"2014":1412,"2018":48289,"2022":61658,"2023":136615}},{"snapshot_date":"2026-01","composition":{"2026":19721,"2024":203679,"2019":68844,"2025":223578,"2021":102930,"2020":80546,"2017":90634,"2015":4433,"2016":20907,"2013":2793,"2014":1412,"2018":48210,"2022":61557,"2023":136219}},{"snapshot_date":"2026-02","composition":{"2026":33082,"2024":195521,"2019":68808,"2025":215585,"2021":102920,"2020":80409,"2014":1412,"2013":2792,"2015":4433,"2016":20907,"2018":48210,"2022":61380,"2017":90634,"2023":134343}},{"snapshot_date":"2026-03","composition":{"2026":38141,"2024":195277,"2019":68486,"2025":215136,"2021":102785,"2020":80280,"2017":90630,"2015":4433,"2016":20907,"2013":2792,"2014":1412,"2018":48104,"2022":61321,"2023":134227}},{"snapshot_date":"2026-04","composition":{"2026":45339,"2024":194885,"2019":68005,"2025":214779,"2021":102512,"2017":89469,"2015":4433,"2016":20263,"2020":79804,"2018":47417,"2022":61254,"2023":133996,"2014":1412,"2013":2792}},{"snapshot_date":"2026-05","composition":{"2026":48526,"2024":194575,"2019":67825,"2025":214195,"2021":102171,"2020":79676,"2017":89431,"2015":4433,"2016":20263,"2018":46991,"2022":61249,"2023":133881,"2014":1412,"2013":2792}}],"fossils":{"genesis":{"timestamp":1369771850,"file":"vendor/jasmine/HtmlReporter.js","content":"var jasmine = require(\"./jasmine\");","year":"2013","commit":"f8af932","view_commit":"f8af93237adaa7c02df9edcbfccd07e6fdaaa0ed","line":1},"survivor":{"timestamp":1369856771,"file":".editorconfig","content":"root = true","year":"2013","commit":"75897c2","view_commit":"main","line":2}}} \ No newline at end of file +{"snapshots":[{"snapshot_date":"2013-06","composition":{"2013":49299}},{"snapshot_date":"2013-09","composition":{"2013":61256}},{"snapshot_date":"2013-12","composition":{"2013":130512}},{"snapshot_date":"2014-03","composition":{"2014":23020,"2013":123687}},{"snapshot_date":"2014-06","composition":{"2013":122493,"2014":30244}},{"snapshot_date":"2014-09","composition":{"2013":120244,"2014":44148}},{"snapshot_date":"2014-12","composition":{"2013":114261,"2014":70090}},{"snapshot_date":"2015-03","composition":{"2014":67640,"2015":84107,"2013":110637}},{"snapshot_date":"2015-06","composition":{"2014":63060,"2013":107612,"2015":103761}},{"snapshot_date":"2015-09","composition":{"2014":61104,"2015":128842,"2013":99577}},{"snapshot_date":"2015-12","composition":{"2014":65919,"2013":97668,"2015":152156}},{"snapshot_date":"2016-03","composition":{"2013":97380,"2015":151704,"2014":59135,"2016":15687}},{"snapshot_date":"2016-06","composition":{"2016":60253,"2013":92596,"2015":144933,"2014":58094}},{"snapshot_date":"2016-09","composition":{"2014":56579,"2016":107146,"2015":127449,"2013":90605}},{"snapshot_date":"2016-12","composition":{"2014":53620,"2016":171100,"2015":93767,"2013":87505}},{"snapshot_date":"2017-03","composition":{"2016":153719,"2017":40564,"2013":85813,"2015":90162,"2014":51722}},{"snapshot_date":"2017-06","composition":{"2016":112177,"2017":100865,"2013":80747,"2015":85364,"2014":49079}},{"snapshot_date":"2017-09","composition":{"2014":42959,"2016":95520,"2017":170284,"2015":78029,"2013":71880}},{"snapshot_date":"2017-12","composition":{"2016":32719,"2017":141395,"2014":3091,"2013":4275,"2015":9115}},{"snapshot_date":"2018-03","composition":{"2017":132926,"2013":4240,"2015":8893,"2014":2840,"2016":31430,"2018":29528}},{"snapshot_date":"2018-06","composition":{"2016":28845,"2017":125268,"2018":50483,"2015":8742,"2013":4144,"2014":2801}},{"snapshot_date":"2018-09","composition":{"2017":121415,"2018":89215,"2016":27784,"2013":4030,"2015":8679,"2014":2750}},{"snapshot_date":"2018-12","composition":{"2017":119427,"2018":110439,"2016":27687,"2014":2745,"2013":4028,"2015":8663}},{"snapshot_date":"2019-03","composition":{"2019":27130}},{"snapshot_date":"2019-06","composition":{"2017":117513,"2015":8651,"2016":26894,"2018":88585,"2019":65595,"2013":4006,"2014":2733}},{"snapshot_date":"2019-09","composition":{"2017":116862,"2019":154937,"2018":85418,"2016":26444,"2015":8633,"2013":4006,"2014":2731}},{"snapshot_date":"2019-12","composition":{"2017":115372,"2019":178970,"2018":80928,"2016":26181,"2015":8651,"2014":2731,"2013":4002}},{"snapshot_date":"2020-03","composition":{"2016":25817,"2018":75202,"2020":42887,"2017":112448,"2019":158206,"2013":3752,"2015":8488,"2014":2625}},{"snapshot_date":"2020-06","composition":{"2019":149670,"2020":109834,"2016":24821,"2018":74006,"2017":111520,"2015":8475,"2014":2635,"2013":3705}},{"snapshot_date":"2020-09","composition":{"2019":132657,"2020":140316,"2017":109379,"2018":71429,"2016":24599,"2015":8365,"2014":2109,"2013":3381}},{"snapshot_date":"2020-12","composition":{"2019":127254,"2020":182464,"2017":106722,"2018":71568,"2013":3381,"2015":8365,"2014":2109,"2016":24570}},{"snapshot_date":"2021-03","composition":{"2019":122222,"2021":25610,"2020":172383,"2017":106608,"2018":70547,"2016":24558,"2015":8363,"2013":3381,"2014":2109}},{"snapshot_date":"2021-06","composition":{"2019":116227,"2021":70698,"2017":104705,"2020":162225,"2018":64628,"2016":24421,"2015":8330,"2014":2097,"2013":3366}},{"snapshot_date":"2021-09","composition":{"2020":153417,"2019":114206,"2021":117474,"2017":104170,"2018":63959,"2015":8185,"2016":24311,"2014":2047,"2013":3347}},{"snapshot_date":"2021-12","composition":{"2021":136954,"2017":103766,"2019":112937,"2020":150902,"2018":63738,"2014":2047,"2016":24244,"2015":8119,"2013":3347}},{"snapshot_date":"2022-03","composition":{"2019":107105,"2021":130490,"2020":147765,"2017":103594,"2015":8093,"2016":24233,"2022":24396,"2018":63526,"2014":2036,"2013":3347}},{"snapshot_date":"2022-06","composition":{"2020":145774,"2019":105925,"2021":127685,"2017":103566,"2022":49310,"2018":62370,"2015":8081,"2016":24217,"2014":2036,"2013":3343}},{"snapshot_date":"2022-09","composition":{"2016":24202,"2018":61123,"2020":139446,"2017":102772,"2019":100139,"2021":124382,"2022":81289,"2014":2036,"2013":3343,"2015":8069}},{"snapshot_date":"2022-12","composition":{"2019":97166,"2021":116918,"2017":101538,"2022":90700,"2020":119753,"2018":59933,"2015":8036,"2016":24147,"2014":2030,"2013":3308}},{"snapshot_date":"2023-03","composition":{"2019":92349,"2021":109360,"2023":49082,"2020":98523,"2014":1943,"2017":97452,"2022":74008,"2016":23525,"2018":57598,"2013":3211,"2015":7685}},{"snapshot_date":"2023-06","composition":{"2019":90609,"2021":105316,"2023":71271,"2017":96874,"2022":71044,"2020":95370,"2016":23444,"2018":57130,"2015":7678,"2014":1942,"2013":3203}},{"snapshot_date":"2023-09","composition":{"2020":94881,"2019":89683,"2021":104279,"2023":89091,"2017":96761,"2022":70205,"2016":23437,"2018":56929,"2014":1942,"2015":7678,"2013":3203}},{"snapshot_date":"2023-12","composition":{"2023":322197,"2021":58572,"2022":9860}},{"snapshot_date":"2024-03","composition":{"2019":78286,"2021":98780,"2023":91993,"2017":92511,"2015":6865,"2016":21601,"2020":89985,"2013":2982,"2014":1611,"2024":56403,"2018":52282,"2022":64120}},{"snapshot_date":"2024-06","composition":{"2024":146695,"2021":109652,"2017":91402,"2019":76480,"2022":69399,"2023":387233,"2020":86381,"2013":2827,"2015":6451,"2014":1434,"2016":21189,"2018":51177}},{"snapshot_date":"2024-09","composition":{"2016":21077,"2018":50727,"2020":84491,"2022":66008,"2024":207309,"2017":91319,"2019":73735,"2021":106604,"2023":372741,"2014":1434,"2015":6450,"2013":2827}},{"snapshot_date":"2024-12","composition":{"2020":83431,"2021":105682,"2024":235398,"2016":21010,"2018":49947,"2022":64957,"2017":91080,"2019":72870,"2023":369720,"2015":6409,"2013":2798,"2014":1424}},{"snapshot_date":"2025-01","composition":{"2024":228979,"2019":72002,"2021":105295,"2023":146990,"2020":81905,"2017":90754,"2015":6344,"2016":20945,"2013":2794,"2014":1412,"2018":48950,"2022":64653,"2025":30519}},{"snapshot_date":"2025-02","composition":{"2024":226806,"2019":70686,"2021":105235,"2023":145909,"2020":81740,"2017":90722,"2015":6341,"2016":20945,"2018":48854,"2022":64519,"2025":43401,"2013":2793,"2014":1412}},{"snapshot_date":"2025-03","composition":{"2024":221556,"2019":70526,"2021":105086,"2023":144980,"2020":81491,"2017":90713,"2015":6341,"2016":20943,"2018":48777,"2022":63742,"2025":76772,"2013":2793,"2014":1412}},{"snapshot_date":"2025-04","composition":{"2024":220576,"2019":70454,"2025":94566,"2021":103830,"2023":143787,"2020":81428,"2017":90707,"2015":6341,"2016":20943,"2018":48770,"2022":63508,"2014":1412,"2013":2793}},{"snapshot_date":"2025-05","composition":{"2024":218220,"2019":70363,"2025":112419,"2021":103777,"2023":142803,"2020":81289,"2017":90702,"2015":6341,"2016":20942,"2018":48749,"2022":63378,"2014":1412,"2013":2793}},{"snapshot_date":"2025-06","composition":{"2024":215878,"2019":70222,"2025":137311,"2021":103737,"2023":142387,"2017":90701,"2015":6341,"2016":20939,"2020":81261,"2018":48706,"2022":63159,"2013":2793,"2014":1412}},{"snapshot_date":"2025-07","composition":{"2024":212777,"2019":69634,"2025":153753,"2021":103393,"2023":141860,"2020":81061,"2017":90690,"2015":6334,"2016":20929,"2014":1412,"2013":2793,"2018":48343,"2022":62740}},{"snapshot_date":"2025-08","composition":{"2024":209351,"2019":69500,"2025":185075,"2021":103254,"2020":80847,"2016":20929,"2018":48330,"2022":62013,"2017":90685,"2023":140410,"2014":1412,"2015":6334,"2013":2793}},{"snapshot_date":"2025-09","composition":{"2024":208280,"2019":69355,"2025":199735,"2021":103075,"2020":80758,"2017":90684,"2015":6334,"2016":20929,"2014":1412,"2018":48324,"2022":61753,"2023":139982,"2013":2793}},{"snapshot_date":"2025-10","composition":{"2024":207431,"2019":69272,"2025":209366,"2021":103037,"2020":80677,"2017":90684,"2015":6334,"2016":20929,"2013":2793,"2014":1412,"2018":48322,"2022":61695,"2023":139651}},{"snapshot_date":"2025-11","composition":{"2019":69017,"2025":220684,"2021":103016,"2024":205775,"2017":90652,"2015":6334,"2016":20915,"2020":80599,"2013":2793,"2014":1412,"2018":48289,"2022":61679,"2023":136904}},{"snapshot_date":"2025-12","composition":{"2024":205425,"2019":68968,"2025":226961,"2021":103003,"2020":80579,"2017":90637,"2015":4434,"2016":20915,"2013":2793,"2014":1412,"2018":48289,"2022":61658,"2023":136615}},{"snapshot_date":"2026-01","composition":{"2026":19721,"2024":203679,"2019":68844,"2025":223578,"2021":102930,"2020":80546,"2017":90634,"2015":4433,"2016":20907,"2013":2793,"2014":1412,"2018":48210,"2022":61557,"2023":136219}},{"snapshot_date":"2026-02","composition":{"2026":33082,"2024":195521,"2019":68808,"2025":215585,"2021":102920,"2020":80409,"2014":1412,"2013":2792,"2015":4433,"2016":20907,"2018":48210,"2022":61380,"2017":90634,"2023":134343}},{"snapshot_date":"2026-03","composition":{"2026":38141,"2024":195277,"2019":68486,"2025":215136,"2021":102785,"2020":80280,"2017":90630,"2015":4433,"2016":20907,"2013":2792,"2014":1412,"2018":48104,"2022":61321,"2023":134227}},{"snapshot_date":"2026-04","composition":{"2026":45339,"2024":194885,"2019":68005,"2025":214779,"2021":102512,"2017":89469,"2015":4433,"2016":20263,"2020":79804,"2018":47417,"2022":61254,"2023":133996,"2014":1412,"2013":2792}},{"snapshot_date":"2026-05","composition":{"2026":48526,"2024":194575,"2019":67825,"2025":214195,"2021":102171,"2020":79676,"2017":89431,"2015":4433,"2016":20263,"2018":46991,"2022":61249,"2023":133881,"2014":1412,"2013":2792}}],"fossils":{"genesis":{"timestamp":1369771850,"file":"vendor/jasmine/HtmlReporter.js","content":"var jasmine = require(\"./jasmine\");","year":"2013","commit":"f8af932","view_commit":"f8af93237adaa7c02df9edcbfccd07e6fdaaa0ed","line":1},"survivor":{"timestamp":1369856771,"file":".editorconfig","content":"root = true","year":"2013","commit":"75897c2","view_commit":"main","line":2}}} \ No newline at end of file diff --git a/data/zed_data.json b/data/zed_data.json index 2a52555..48fa067 100644 --- a/data/zed_data.json +++ b/data/zed_data.json @@ -1 +1 @@ -{"snapshots":[{"snapshot_date":"2021-03","composition":{"2021":25386}},{"snapshot_date":"2021-06","composition":{"2021":44898}},{"snapshot_date":"2021-09","composition":{"2021":76985}},{"snapshot_date":"2021-12","composition":{"2021":94004}},{"snapshot_date":"2022-03","composition":{"2021":73693,"2022":388255,"2026":4}},{"snapshot_date":"2022-06","composition":{"2021":51112,"2022":439373,"2026":4}},{"snapshot_date":"2022-09","composition":{"2021":47923,"2022":461986,"2026":4}},{"snapshot_date":"2022-12","composition":{"2022":491696,"2021":42941,"2026":4}},{"snapshot_date":"2023-03","composition":{"2022":476272,"2023":50012,"2021":39796,"2026":4}},{"snapshot_date":"2023-06","composition":{"2023":113736,"2022":453849,"2021":36084,"2026":4}},{"snapshot_date":"2023-09","composition":{"2023":201675,"2022":444799,"2021":35314,"2026":4}},{"snapshot_date":"2023-12","composition":{"2023":568550,"2022":439807,"2021":34913,"2026":8}},{"snapshot_date":"2024-03","composition":{"2024":190937,"2023":147177,"2022":411340,"2021":22875,"2026":113}},{"snapshot_date":"2024-06","composition":{"2024":309661,"2023":130643,"2022":66647,"2021":21876,"2026":151}},{"snapshot_date":"2024-09","composition":{"2024":441405,"2023":116364,"2021":20941,"2022":61998,"2026":162}},{"snapshot_date":"2024-12","composition":{"2024":522212,"2023":111939,"2021":19618,"2022":58843,"2026":170}},{"snapshot_date":"2025-01","composition":{"2025":87396,"2024":473046,"2023":106560,"2022":56478,"2021":19030,"2026":175}},{"snapshot_date":"2025-02","composition":{"2025":136662,"2024":445718,"2023":104630,"2021":18947,"2022":55697,"2026":182}},{"snapshot_date":"2025-03","composition":{"2025":232121,"2024":420152,"2023":100255,"2022":53002,"2021":18700,"2026":182}},{"snapshot_date":"2025-04","composition":{"2026":189,"2025":295774,"2024":405727,"2023":98899,"2022":51571,"2021":18379}},{"snapshot_date":"2025-05","composition":{"2026":189,"2025":383631,"2024":389034,"2023":97957,"2022":51077,"2021":18342}},{"snapshot_date":"2025-06","composition":{"2026":194,"2025":435521,"2024":381915,"2023":96914,"2021":18310,"2022":50493}},{"snapshot_date":"2025-07","composition":{"2026":205,"2025":505702,"2024":370960,"2023":96086,"2021":18028,"2022":50134}},{"snapshot_date":"2025-08","composition":{"2024":342196,"2025":587373,"2023":94193,"2026":210,"2022":49275,"2021":17791}},{"snapshot_date":"2025-09","composition":{"2026":217,"2025":647340,"2024":322913,"2023":89361,"2022":48437,"2021":17312}},{"snapshot_date":"2025-10","composition":{"2026":216,"2025":785509,"2024":313682,"2023":87171,"2022":47531,"2021":16776}},{"snapshot_date":"2025-11","composition":{"2026":216,"2025":829198,"2024":308252,"2023":85866,"2022":46995,"2021":16515}},{"snapshot_date":"2025-12","composition":{"2024":303394,"2026":3260,"2025":878595,"2023":84991,"2022":46559,"2021":16345}},{"snapshot_date":"2026-01","composition":{"2025":827305,"2024":296062,"2023":82999,"2026":155530,"2022":46183,"2021":16051}},{"snapshot_date":"2026-02","composition":{"2025":792544,"2024":287028,"2023":81769,"2026":287243,"2022":44769,"2021":15713}},{"snapshot_date":"2026-03","composition":{"2025":759242,"2024":272004,"2023":80746,"2026":424335,"2022":44357,"2021":15498}},{"snapshot_date":"2026-04","composition":{"2024":267533,"2025":738032,"2026":536594,"2023":78982,"2022":43488,"2021":15278}},{"snapshot_date":"2026-05","composition":{"2026":647040,"2025":678530,"2024":262431,"2023":77869,"2022":41743,"2021":15214}}],"fossils":{"genesis":{"timestamp":1613862336,"file":"gpui/src/executor.rs","content":"// #[cfg(not(test))]","year":"2021","commit":"222f9d3","view_commit":"222f9d373df677d7c5f8427984b4206f36f53a2a","line":1},"survivor":{"timestamp":1613840554,"file":"Cargo.toml","content":"[workspace]","year":"2021","commit":"b400449","view_commit":"main","line":1}}} \ No newline at end of file +{"snapshots":[{"snapshot_date":"2021-03","composition":{"2021":25386}},{"snapshot_date":"2021-06","composition":{"2021":44898}},{"snapshot_date":"2021-09","composition":{"2021":76985}},{"snapshot_date":"2021-12","composition":{"2021":94004}},{"snapshot_date":"2022-03","composition":{"2021":73693,"2022":388255}},{"snapshot_date":"2022-06","composition":{"2021":51112,"2022":439373}},{"snapshot_date":"2022-09","composition":{"2021":47923,"2022":461986}},{"snapshot_date":"2022-12","composition":{"2022":491696,"2021":42941}},{"snapshot_date":"2023-03","composition":{"2022":476272,"2023":50012,"2021":39796}},{"snapshot_date":"2023-06","composition":{"2023":113736,"2022":453849,"2021":36084}},{"snapshot_date":"2023-09","composition":{"2023":201675,"2022":444799,"2021":35314}},{"snapshot_date":"2023-12","composition":{"2023":568550,"2022":439807,"2021":34913}},{"snapshot_date":"2024-03","composition":{"2024":190937,"2023":147177,"2022":411340,"2021":22875}},{"snapshot_date":"2024-06","composition":{"2024":309661,"2023":130643,"2022":66647,"2021":21876}},{"snapshot_date":"2024-09","composition":{"2024":441405,"2023":116364,"2021":20941,"2022":61998}},{"snapshot_date":"2024-12","composition":{"2024":522212,"2023":111939,"2021":19618,"2022":58843}},{"snapshot_date":"2025-01","composition":{"2025":87396,"2024":473046,"2023":106560,"2022":56478,"2021":19030}},{"snapshot_date":"2025-02","composition":{"2025":136662,"2024":445718,"2023":104630,"2021":18947,"2022":55697}},{"snapshot_date":"2025-03","composition":{"2025":232121,"2024":420152,"2023":100255,"2022":53002,"2021":18700}},{"snapshot_date":"2025-04","composition":{"2025":295774,"2024":405727,"2023":98899,"2022":51571,"2021":18379}},{"snapshot_date":"2025-05","composition":{"2025":383631,"2024":389034,"2023":97957,"2022":51077,"2021":18342}},{"snapshot_date":"2025-06","composition":{"2025":435521,"2024":381915,"2023":96914,"2021":18310,"2022":50493}},{"snapshot_date":"2025-07","composition":{"2025":505702,"2024":370960,"2023":96086,"2021":18028,"2022":50134}},{"snapshot_date":"2025-08","composition":{"2024":342196,"2025":587373,"2023":94193,"2022":49275,"2021":17791}},{"snapshot_date":"2025-09","composition":{"2025":647340,"2024":322913,"2023":89361,"2022":48437,"2021":17312}},{"snapshot_date":"2025-10","composition":{"2025":785509,"2024":313682,"2023":87171,"2022":47531,"2021":16776}},{"snapshot_date":"2025-11","composition":{"2025":829198,"2024":308252,"2023":85866,"2022":46995,"2021":16515}},{"snapshot_date":"2025-12","composition":{"2024":303394,"2025":878595,"2023":84991,"2022":46559,"2021":16345}},{"snapshot_date":"2026-01","composition":{"2025":827305,"2024":296062,"2023":82999,"2026":155530,"2022":46183,"2021":16051}},{"snapshot_date":"2026-02","composition":{"2025":792544,"2024":287028,"2023":81769,"2026":287243,"2022":44769,"2021":15713}},{"snapshot_date":"2026-03","composition":{"2025":759242,"2024":272004,"2023":80746,"2026":424335,"2022":44357,"2021":15498}},{"snapshot_date":"2026-04","composition":{"2024":267533,"2025":738032,"2026":536594,"2023":78982,"2022":43488,"2021":15278}},{"snapshot_date":"2026-05","composition":{"2026":647040,"2025":678530,"2024":262431,"2023":77869,"2022":41743,"2021":15214}}],"fossils":{"genesis":{"timestamp":1613862336,"file":"gpui/src/executor.rs","content":"// #[cfg(not(test))]","year":"2021","commit":"222f9d3","view_commit":"222f9d373df677d7c5f8427984b4206f36f53a2a","line":1},"survivor":{"timestamp":1613840554,"file":"Cargo.toml","content":"[workspace]","year":"2021","commit":"b400449","view_commit":"main","line":1}}} \ No newline at end of file diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index c5e0c45..f456e55 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -151,6 +151,12 @@ def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: except ValueError: pass + logger.info(" Blaming %d valid files (%d workers)...", len(valid_files), max_workers) + + total_files = len(valid_files) + completed = 0 + next_log_pct = 10 + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_file = { executor.submit(_blame_single_file, repo_path, file): file @@ -162,6 +168,12 @@ def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: for year, count in file_dist.items(): age_distribution[year] += count + completed += 1 + pct = completed / total_files * 100 + if pct >= next_log_pct: + logger.info(" Blame progress: %d/%d (%.0f%%)", completed, total_files, pct) + next_log_pct += 10 + return dict(age_distribution) From 0f264acbaf6f5e5f5506756f1d5fb46bdf4be7e2 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sun, 31 May 2026 17:41:56 +0530 Subject: [PATCH 7/9] #33 made code more modular and updated workflow --- .github/workflows/integration-tests.yml | 6 +- .github/workflows/theseus-engine.yml | 27 +- scripts/_blame.py | 267 ++++++++++++++++ scripts/_data_io.py | 92 ++++++ scripts/_utils.py | 20 +- scripts/add_fossils.py | 401 ++++++++---------------- scripts/analyse_repository.py | 282 ++++++----------- scripts/cleanup_data.py | 68 ++-- scripts/run_pipeline.py | 212 +++++++++++++ tests/test_analyse_repository.py | 37 +-- 10 files changed, 896 insertions(+), 516 deletions(-) create mode 100644 scripts/_blame.py create mode 100644 scripts/_data_io.py create mode 100644 scripts/run_pipeline.py diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 6b4afe5..7670600 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -1,4 +1,4 @@ -name: Integration Tests +name: Unit Tests on: push: @@ -8,7 +8,7 @@ on: jobs: test: - name: Run Integration Tests + name: Run Unit Tests runs-on: ubuntu-latest steps: @@ -24,7 +24,7 @@ jobs: run: pipx install poetry - name: Install dependencies - run: poetry install --no-root --with dev + run: poetry install --with dev - name: Run tests run: poetry run pytest tests/ -v --tb=short diff --git a/.github/workflows/theseus-engine.yml b/.github/workflows/theseus-engine.yml index 5273235..e256e91 100644 --- a/.github/workflows/theseus-engine.yml +++ b/.github/workflows/theseus-engine.yml @@ -16,6 +16,7 @@ jobs: uses: actions/checkout@v4 with: token: ${{ secrets.GITHUB_TOKEN }} + fetch-depth: 0 - name: Setup python 3.12 uses: actions/setup-python@v5 @@ -23,28 +24,20 @@ jobs: python-version: "3.12" - name: Install poetry - uses: snok/install-poetry@v1 - with: - virtualenvs-create: true - virtualenvs-in-project: true + run: pipx install poetry - name: Install dependencies run: poetry install --no-interaction --no-root - - name: Run theseus multi-repo analysis (delta load) - run: poetry run python scripts/analyse_repository.py - - - name: Update living fossils (survivor check) + - name: Run theseus data pipeline (snapshots → survivor → cleanup) run: | - # Re-blame HEAD for every repo and update the survivor fossil only if - # the file:line:commit has actually changed since the last run. - # Genesis (historical fossil) is left completely untouched. - poetry run python scripts/add_fossils.py --update-survivor - - - name: Clean & minify data payloads - run: poetry run python scripts/cleanup_data.py + # Analyse new snapshot periods, refresh survivor fossils, and clean/minify + # all data payloads. Genesis (historical fossil) is left untouched + # during monthly cron runs. + poetry run python scripts/run_pipeline.py --update-survivor - name: Create pull request for data updates + if: success() uses: peter-evans/create-pull-request@v6 with: token: ${{ secrets.GITHUB_TOKEN }} @@ -55,9 +48,9 @@ jobs: title: "chore: monthly theseus data pipeline update" body: | ## Automated Theseus Data Engine Run - + This pull request contains the latest pre-computed persistence data, minified payloads, and fossil updates for the tracked repositories. - + **Trigger:** Monthly Schedule / Workflow Dispatch **Action:** Deltas calculated and fossils verified. labels: "automated pr, data update" diff --git a/scripts/_blame.py b/scripts/_blame.py new file mode 100644 index 0000000..393b0c5 --- /dev/null +++ b/scripts/_blame.py @@ -0,0 +1,267 @@ +""" +Shared git blame infrastructure for the Theseus pipeline. + +Parses ``git blame --line-porcelain`` output and dispatches parallel blame +across file lists. Two post-processing modes are exposed: + +* ``parse_blame_year_counts``: aggregate lines per author-year for snapshot + analysis (used by ``analyse_repository.py``). +* ``find_oldest_fossil_in_blame``: find the single oldest-authored line in a + file's blame output (used by ``add_fossils.py``). + +Fossil data model +----------------- +A **fossil** is a single source-code line whose author-timestamp is the +oldest ever found in a given scope. Each fossil records: + +``timestamp`` + Unix-epoch author-time. +``file`` + Relative file path. +``content`` + The actual source line text. +``year`` + 4-digit year derived from ``timestamp``. +``commit`` + First 7 characters of the commit hash that last modified this line. +``view_commit`` + The git ref (commit hash or branch name) at which the file is checked out. +``line`` + 1-based line number within the file. + +Two concrete fossil types are discovered by the pipeline: + + **Genesis** — the single oldest-authored line *ever* to exist in the repo. + Found by blaming only the files added in each of the earliest commits + (sorted by author-time), scanning until ``stale_limit`` consecutive commits + fail to improve the oldest-yet result. + + **Survivor** — the single oldest-authored line that *still exists* at the + current HEAD. Found by blaming every tracked file on the default branch. +""" + +import concurrent.futures +import logging +import os +import sys +from collections import defaultdict +from datetime import datetime, timezone +from pathlib import Path + +# Ensure sibling imports work in all invocation contexts +_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +if _SCRIPTS_DIR not in sys.path: + sys.path.insert(0, _SCRIPTS_DIR) + +from _utils import run_command + +logger = logging.getLogger(__name__) + + +# Fossil helper +def _blank_fossil() -> dict: + """Return a blank fossil dict with the maximum possible timestamp.""" + return { + "timestamp": 2_147_483_647, + "file": "", + "content": "", + "year": "", + "commit": "", + "view_commit": "", + "line": 0, + } + + +# Single-file blame +def blame_single_file(repo_path: str | Path, file_path: str) -> str: + """ + Run ``git blame --line-porcelain`` on a single file and return raw output. + + :param repo_path: Path to the git repository. + :param file_path: Relative path of the file to blame. + :return: Raw ``--line-porcelain`` output, or empty string on failure. + """ + try: + return run_command( + ["git", "blame", "--line-porcelain", file_path], + cwd=str(repo_path), + ) + except RuntimeError: + return "" + + +# Post-processing: year-count mode (for snapshot analysis) +def parse_blame_year_counts(raw_output: str) -> dict[str, int]: + """ + Parse ``git blame --line-porcelain`` output into a year-to-line-count map. + + :param raw_output: The raw porcelain output. + :return: Dictionary mapping 4-digit year strings to line counts. + """ + distribution = defaultdict(int) + commit_to_year = {} + current_commit = None + + for line in raw_output.splitlines(): + if line.startswith("\t"): + if current_commit and current_commit in commit_to_year: + year = commit_to_year[current_commit] + distribution[year] += 1 + else: + parts = line.split(" ") + if len(parts[0]) in (40, 64) and all( + c in "0123456789abcdef" for c in parts[0].lower() + ): + current_commit = parts[0] + elif parts[0] == "author-time": + try: + timestamp = int(parts[1]) + year = datetime.fromtimestamp(timestamp, timezone.utc).strftime( + "%Y" + ) + commit_to_year[current_commit] = year + except (ValueError, IndexError): + pass + + return dict(distribution) + + +# Post-processing: oldest-fossil mode (for fossil discovery) +def find_oldest_fossil_in_blame( + raw_output: str, file_path: str, view_commit: str = "" +) -> dict: + """ + Find the oldest-authored line in a single file's blame output. + + :param raw_output: Raw ``--line-porcelain`` output. + :param file_path: Path of the blamed file (stored in the result). + :param view_commit: Git ref to store as ``view_commit`` in the result. + :return: A fossil dict for the oldest line found, or a blank fossil if no + lines could be blamed. + """ + fossil = _blank_fossil() + current_commit_data = {} + line_num = 0 + + for line in raw_output.splitlines(): + if line.startswith("\t"): + line_num += 1 + timestamp = current_commit_data.get("author-time") + content = line.lstrip("\t").strip() + if timestamp is not None and timestamp < fossil["timestamp"] and content: + fossil["timestamp"] = timestamp + fossil["file"] = file_path + fossil["content"] = content + fossil["year"] = datetime.fromtimestamp( + timestamp, timezone.utc + ).strftime("%Y") + fossil["commit"] = current_commit_data.get("commit", "")[:7] + fossil["view_commit"] = view_commit + fossil["line"] = line_num + else: + parts = line.split(" ") + if ( + parts + and len(parts[0]) in (40, 64) + and all(c in "0123456789abcdef" for c in parts[0].lower()) + ): + current_commit_data = {"commit": parts[0]} + elif line.startswith("author-time ") and len(parts) >= 2: + try: + current_commit_data["author-time"] = int(parts[1]) + except ValueError: + pass + + return fossil + + +# Parallel blame runner (internal) +def _blame_files_internal( + repo_path: str | Path, + files: list[str], + max_workers: int, + process_result, + total_files_hint: int | None = None, +) -> None: + """ + Blame files in parallel and call ``process_result(file, raw_output)`` for each. + + Logs 10 % progress steps so the user sees the script is making progress. + + :param repo_path: Path to the git repository. + :param files: List of relative file paths to blame. + :param max_workers: Maximum number of parallel blame processes. + :param process_result: Callback ``(file_path: str, raw_output: str) -> None``. + :param total_files_hint: For display purposes only; overrides the log count. + """ + total = total_files_hint or len(files) + completed = 0 + next_log_pct = 10 + + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + future_to_file = { + executor.submit(blame_single_file, repo_path, f): f for f in files + } + + for future in concurrent.futures.as_completed(future_to_file): + file_path = future_to_file[future] + raw_output = future.result() + if raw_output: + process_result(file_path, raw_output) + + completed += 1 + pct = completed / total * 100 + if pct >= next_log_pct: + logger.info(" Blame progress: %d/%d (%.0f%%)", completed, total, pct) + next_log_pct += 10 + + +# Public parallel-blame helpers +def blame_files_year_counts( + repo_path: str | Path, files: list[str], max_workers: int = 8 +) -> dict[str, int]: + """ + Blame a list of files in parallel and return an aggregated year-to-line-count map. + + :param repo_path: Path to the git repository. + :param files: List of relative file paths to blame. + :param max_workers: Maximum parallel blame processes (default 8). + :return: ``{year: line_count}`` aggregated across all files. + """ + logger.info(" Blaming %d files (%d workers)...", len(files), max_workers) + age_distribution: dict[str, int] = defaultdict(int) + + def _accumulate(file_path: str, raw_output: str) -> None: + for year, count in parse_blame_year_counts(raw_output).items(): + age_distribution[year] += count + + _blame_files_internal(repo_path, files, max_workers, _accumulate) + return dict(age_distribution) + + +def blame_files_oldest_fossil( + repo_path: str | Path, + files: list[str], + max_workers: int = 20, + view_commit: str = "", +) -> dict: + """ + Blame a list of files in parallel and return the single oldest fossil found. + + :param repo_path: Path to the git repository. + :param files: List of relative file paths to blame. + :param max_workers: Maximum parallel blame processes (default 20). + :param view_commit: Git ref to store as ``view_commit`` in the result. + :return: Fossil dict for the oldest line across all files, or a blank + fossil if no lines could be blamed. + """ + global_oldest = _blank_fossil() + + def _find(file_path: str, raw_output: str) -> None: + nonlocal global_oldest + fossil = find_oldest_fossil_in_blame(raw_output, file_path, view_commit) + if fossil["timestamp"] < global_oldest["timestamp"] and fossil["file"]: + global_oldest = fossil + + _blame_files_internal(repo_path, files, max_workers, _find) + return global_oldest diff --git a/scripts/_data_io.py b/scripts/_data_io.py new file mode 100644 index 0000000..541c434 --- /dev/null +++ b/scripts/_data_io.py @@ -0,0 +1,92 @@ +""" +Shared JSON I/O for Theseus data pipeline scripts. + +All repository data files share a common top-level structure: + +.. code-block:: json + + { + "snapshots": [ { "snapshot_date": "YYYY-MM", "composition": {"YYYY": count, ...} }, ... ], + "fossils": { "genesis": { ... }, "survivor": { ... } } + } + +The ``fossils`` object stores the two fossil types: + + **Genesis** (``fossils.genesis``) + The single oldest-authored line *ever* written in the repository. + Discovered by blaming only files added in each of the earliest commits + (sorted by author-time) and returning the line with the smallest + author-timestamp. + + **Survivor** (``fossils.survivor``) + The single oldest-authored line that *still exists* at the current HEAD. + Discovered by blaming every tracked file on the default branch and + returning the line with the smallest author-timestamp. + + Each fossil stores: + + ``timestamp`` + Unix-epoch author-time of the oldest line. + ``file`` + Relative file path. + ``content`` + The actual source line. + ``year`` + 4-digit year derived from ``timestamp``. + ``commit`` + First 7 characters of the commit hash. + ``view_commit`` + The git ref (commit hash or branch name) used to view this file. + ``line`` + Line number within the file. +""" + +import json +import logging +import os + +logger = logging.getLogger(__name__) + + +# TODO: Move away from OS to Pathlib +def load_snapshot_data(file_path: str) -> dict: + """ + Load snapshot data from a JSON file, normalising to ``{snapshots, fossils}``. + + Supports both the new dict schema (``{"snapshots": [...], "fossils": {...}}``) + and the legacy list schema (``[{...}, ...]``). + + :param file_path: Path to the JSON data file. + :return: Dictionary with ``snapshots`` (list) and ``fossils`` (dict) keys. + """ + if not os.path.exists(file_path): + return {"snapshots": [], "fossils": {}} + + try: + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, list): + return {"snapshots": data, "fossils": {}} + return data + except json.JSONDecodeError: + logger.warning("%s is corrupted, starting fresh.", file_path) + return {"snapshots": [], "fossils": {}} + + +# TODO: Move away from OS to Pathlib +def save_snapshot_data(file_path: str, snapshots: list[dict], fossils: dict) -> None: + """ + Atomically write snapshot data to a minified JSON file. + + Writes to a ``.tmp`` sibling first, then atomically replaces the target + via ``os.replace`` to prevent file corruption on crash. + + :param file_path: Destination path. + :param snapshots: List of snapshot objects. + :param fossils: Fossil dictionary (``genesis`` + ``survivor`` keys). + """ + tmp_path = file_path + ".tmp" + data = {"snapshots": snapshots, "fossils": fossils} + with open(tmp_path, "w", encoding="utf-8") as f: + json.dump(data, f, separators=(",", ":")) + os.replace(tmp_path, file_path) diff --git a/scripts/_utils.py b/scripts/_utils.py index 0dd75e7..2b72de1 100644 --- a/scripts/_utils.py +++ b/scripts/_utils.py @@ -89,7 +89,7 @@ def get_default_branch(repo_path: str | None = None) -> str: try: result = run_command(strategy, cwd=repo_path) branch = ( - result[len("origin/"):] if result.startswith("origin/") else result + result[len("origin/") :] if result.startswith("origin/") else result ) if branch: return branch @@ -109,6 +109,22 @@ def get_default_branch(repo_path: str | None = None) -> str: return "HEAD" +def get_tracked_files(repo_path: str | None = None) -> list[str]: + """ + Return a list of files that are tracked by git and exist on disk. + + :param repo_path: Path to the git repository (or ``None`` for CWD). + :return: List of relative file paths. + """ + files_output = run_command(["git", "ls-files"], cwd=repo_path) + resolved = str(repo_path) if repo_path else os.getcwd() + return [ + f + for f in files_output.splitlines() + if os.path.isfile(os.path.join(resolved, f)) + ] + + def remove_path(path: str) -> None: """ Remove a file or directory using OS-native fast deletion. @@ -166,6 +182,6 @@ def handle_remove_readonly(func, path, _exc_info): break except Exception: # noqa: BLE001 if attempt < 2: - time.sleep(2 ** attempt) + time.sleep(2**attempt) else: logger.warning("Failed to clean up %s after 3 attempts", path) diff --git a/scripts/add_fossils.py b/scripts/add_fossils.py index c59d5a4..5cee668 100644 --- a/scripts/add_fossils.py +++ b/scripts/add_fossils.py @@ -1,38 +1,74 @@ """ Fossil Finder — Backfill & Incremental Update Script ===================================================== -Manages two fossil types for each repo's data JSON without touching snapshot data: - - Genesis (Historical Fossil) — the oldest line **ever written** in this repo's - entire git history, found by blaming the very first commit(s). - - Survivor (Living Fossil) — the oldest line that is **still alive today**, - found by blaming all files at the current default-branch HEAD. +Manages two fossil types for each repository's data JSON without touching +snapshot data. + +Fossil data model +----------------- +A **fossil** is a single source-code line whose author-timestamp is the oldest +ever found in a given scope. Each fossil records: + +``timestamp`` + Unix-epoch author-time. +``file`` + Relative file path. +``content`` + The actual source line text. +``year`` + 4-digit year derived from ``timestamp``. +``commit`` + First 7 characters of the commit that last modified this line. +``view_commit`` + The git ref (commit hash or branch name) at which the file is checked out. +``line`` + 1-based line number within the file. + +Two concrete fossil types are discovered by this script: + + **Genesis** (Historical Fossil) + The single oldest-authored line *ever* to exist in the repository. + Found by scanning the earliest commits sorted by author-time, blaming + only the files that were *added* in each commit, and returning the line + with the smallest author-timestamp across all scanned commits. + An early-exit heuristic stops after ``stale_limit`` consecutive commits + that fail to improve the oldest-yet result. + + **Survivor** (Living Fossil) + The single oldest-authored line that *still exists* at the current HEAD. + Found by blaming every tracked file on the default branch and returning + the line with the smallest author-timestamp. Modes ----- - (no flags) Full backfill: recompute both Genesis and Survivor for all repos. - --update-survivor Incremental: only refresh the Survivor fossil for each repo, - and only write to disk if the file:line has actually changed. - This is the mode used by the GitHub Actions workflow. + (no flags) Full backfill: recompute both Genesis and Survivor + for all repos. + --update-survivor Incremental: only refresh the Survivor fossil. + Skips writing to disk if the file:line:commit has not + changed. Used by the GitHub Actions workflow. --only REPO Limit processing to a single named repo. """ import argparse -import concurrent.futures -import json import logging import os import sys -from datetime import datetime, timezone from pathlib import Path -# Ensure sibling imports from _utils work in all invocation contexts +# Ensure sibling imports work in all invocation contexts _SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) if _SCRIPTS_DIR not in sys.path: sys.path.insert(0, _SCRIPTS_DIR) -from _utils import get_default_branch, load_config, remove_path, run_command +from _blame import _blank_fossil, blame_files_oldest_fossil +from _data_io import load_snapshot_data, save_snapshot_data +from _utils import ( + get_default_branch, + get_tracked_files, + load_config, + remove_path, + run_command, +) logger = logging.getLogger(__name__) @@ -42,95 +78,20 @@ # --------------------------------------------------------------------------- -def _blank_fossil() -> dict: - return { - "timestamp": 2_147_483_647, - "file": "", - "content": "", - "year": "", - "commit": "", - "view_commit": "", - "line": 0, - } - - -def _blame_file(repo_path: str | Path, file_path: str, view_commit: str = "") -> dict: - """Run git blame --line-porcelain on a single file and return the oldest fossil found.""" - try: - blame_output = run_command( - ["git", "blame", "--line-porcelain", file_path], - cwd=repo_path, - ) - except RuntimeError: - return _blank_fossil() - - fossil = _blank_fossil() - current_commit_data = {} - line_num = 0 - - for line in blame_output.splitlines(): - if line.startswith("\t"): - line_num += 1 - timestamp = current_commit_data.get("author-time") - content = line.lstrip("\t").strip() - if timestamp is not None and timestamp < fossil["timestamp"] and content: - fossil["timestamp"] = timestamp - fossil["file"] = file_path - fossil["content"] = content - fossil["year"] = datetime.fromtimestamp( - timestamp, timezone.utc - ).strftime("%Y") - fossil["commit"] = current_commit_data.get("commit", "")[:7] - fossil["view_commit"] = ( - view_commit # the checkout commit — file is guaranteed to exist here - ) - fossil["line"] = line_num - else: - parts = line.split(" ") - if ( - parts - and len(parts[0]) in (40, 64) - and all(c in "0123456789abcdef" for c in parts[0].lower()) - ): - current_commit_data = {"commit": parts[0]} - elif line.startswith("author-time ") and len(parts) >= 2: - try: - current_commit_data["author-time"] = int(parts[1]) - except ValueError: - pass - - return fossil - - -def _blame_files_parallel( - repo_path: str | Path, - files: list[str], - view_commit: str = "", - max_workers: int = 20, -) -> dict: - """Blame a list of files in parallel and return the single oldest fossil found.""" - global_oldest = _blank_fossil() - - with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: - futures = { - executor.submit(_blame_file, repo_path, f, view_commit): f for f in files - } - for future in concurrent.futures.as_completed(futures): - result = future.result() - if result["timestamp"] < global_oldest["timestamp"] and result["file"]: - global_oldest = result - - return global_oldest +def _fossil_identity(fossil: dict) -> tuple: + """ + Return a hashable key identifying which line this fossil refers to. + Uses ``(file, blame_commit)`` — the authoring commit uniquely identifies + the content. Line numbers are intentionally excluded: a line that stays + in the same file but shifts position (due to insertions/deletions above it) + is still the same fossil. Only a change in file or authoring commit + (meaning the line was actually rewritten) counts as a different fossil. -def _get_tracked_files(repo_path: str | Path) -> list[str]: - """Return a list of files that are tracked by git and exist on disk.""" - files_output = run_command(["git", "ls-files"], cwd=repo_path) - return [ - f - for f in files_output.splitlines() - if os.path.isfile(os.path.join(str(repo_path), f)) - ] + :param fossil: A fossil dict. + :return: ``(file, commit)`` tuple. + """ + return (fossil.get("file", ""), fossil.get("commit", "")) def _get_files_added_in_commit(repo_path: str | Path, commit_hash: str) -> list[str]: @@ -138,34 +99,14 @@ def _get_files_added_in_commit(repo_path: str | Path, commit_hash: str) -> list[ Return files that were *added* (not modified, not renamed) by this commit. Uses ``git diff-tree --diff-filter=A`` which only lists new files - introduced in the commit, compared to its parent(s). For the root - commit (no parent) the command fails so we fall back to ``git ls-files``. - - Complexity - ---------- - Before (``_get_tracked_files``): - O(all_tracked_files) per commit — every file at that checkout is - blamed, even files that were added centuries earlier. - - After (``_get_files_added_in_commit``): - O(added_files_only) per commit — only files that first appear in - this commit are blamed. Files from older commits were already - handled in earlier iterations of the genesis loop, so re-blaming - them is redundant. - - Why this is safe - ---------------- - ``git blame --line-porcelain`` traces each line back to the commit - that *last modified* that line. If a file was added at commit K and - never touched again, blaming it at K or at any later commit returns - the same author-time == K. If a file was added at K and modified at - K+2, the modified lines will show author-time == K+2, which is never - older than K. Therefore the oldest line of any file is found by - blaming that file exactly once — at the commit where it first - appeared in the tree. + introduced in the commit, compared to its parent(s). For the root commit + (no parent) the command fails so we fall back to ``git ls-files``. + + :param repo_path: Path to the git repository. + :param commit_hash: The commit to inspect. + :return: List of relative file paths added in this commit. """ try: - # For non-root commits — compare against parent(s) files_output = run_command( [ "git", @@ -176,27 +117,14 @@ def _get_files_added_in_commit(repo_path: str | Path, commit_hash: str) -> list[ "--name-only", commit_hash, ], - cwd=repo_path, + cwd=str(repo_path), ) return files_output.splitlines() if files_output else [] except RuntimeError: - # Root commit has no parent — all tracked files are "new" - files_output = run_command(["git", "ls-files"], cwd=repo_path) + files_output = run_command(["git", "ls-files"], cwd=str(repo_path)) return files_output.splitlines() -def _fossil_identity(fossil: dict) -> tuple: - """Return a hashable key that identifies which line this fossil refers to. - - Uses (file, blame_commit) — the authoring commit uniquely identifies the - content. Line numbers are intentionally excluded: a line that stays in - the same file but shifts position (due to insertions/deletions above it) - is still the same fossil. Only a change in file or authoring commit - (meaning the line was actually rewritten) counts as a different fossil. - """ - return (fossil.get("file", ""), fossil.get("commit", "")) - - # --------------------------------------------------------------------------- # Genesis — Historical Fossil # --------------------------------------------------------------------------- @@ -212,43 +140,24 @@ def get_genesis_fossil( Strategy -------- - Sort ALL commits by author-time (not committer-time), then scan the oldest - ``genesis_depth`` commits. This correctly handles repos migrated from - SVN/Mercurial where old authored lines may appear in commits with much - later committer timestamps. - - Early-exit heuristic - ~~~~~~~~~~~~~~~~~~~~ - Once a fossil has been found, if ``stale_limit`` consecutive older commits - fail to improve it (no line with a smaller author-time), the scan stops. - The assumption is that if a long stretch of early commits doesn't contain - anything older than what we already have, no older line exists anywhere. - - Why this is safe - ~~~~~~~~~~~~~~~~ - The very first commit (lowest author-time) is always scanned first. If the - oldest code was added in one of the earliest commits, it will be found - immediately. The stale-limit window (default 5) gives enough room for - repos where the first commit only contained a README and the real code was - added in a slightly later commit, while stopping *well* before 50 in the - common case. - - Before (hardcoded 50) - Worst case: 50 blame passes over the full file tree at each commit. - Even with the ``_get_files_added_in_commit`` optimisation, scanning 50 - commits is unnecessary for most repos. - - After (adaptive stale-limit + hard cap) - Most repos stop after 5--10 commits. Edge cases (e.g. a repo with - many distinct old commits that each add new source files) still have - the hard safety cap of ``genesis_depth=50``. + Sort ALL commits by author-time, then scan the oldest ``genesis_depth`` + commits. Within each commit, blame only files that were *added* in that + commit (files from older commits have already been blamed in previous + iterations). An early-exit heuristic stops after ``stale_limit`` + consecutive commits that fail to improve the oldest-yet result. + + :param repo_path: Path to the git repository. + :param genesis_depth: Maximum number of oldest commits to scan (default 50). + :param stale_limit: Stop after this many consecutive commits with no + improvement (default 5). + :return: A fossil dict for the oldest line ever found, or a blank fossil + if no lines could be blamed. """ logger.info("Computing Genesis (Historical) fossil...") - # Get every commit with its author-time so we can sort by actual authorship date log_output = run_command( ["git", "log", "--all", "--pretty=format:%H %at"], - cwd=repo_path, + cwd=str(repo_path), ) commit_pairs: list[tuple[str, int]] = [] @@ -264,7 +173,6 @@ def get_genesis_fossil( logger.warning("No commits found in repo.") return _blank_fossil() - # Sort by author-time ascending → oldest authored commits first commit_pairs.sort(key=lambda x: x[1]) oldest_commits = [(c[0], c[1]) for c in commit_pairs[:genesis_depth]] @@ -280,15 +188,11 @@ def get_genesis_fossil( author_ts, ) try: - run_command(["git", "checkout", "--force", commit], cwd=repo_path) + run_command(["git", "checkout", "--force", commit], cwd=str(repo_path)) except RuntimeError as e: logger.warning(" Could not checkout %s: %s", commit[:7], e) continue - # Only blame files that were *added* in this commit, not every - # tracked file. Files added in older commits have already been - # blamed in previous loop iterations — re-blaming them is wasted work. - # See _get_files_added_in_commit for the full reasoning. files = _get_files_added_in_commit(repo_path, commit) if not files: stale_count += 1 @@ -302,7 +206,7 @@ def get_genesis_fossil( break continue - fossil = _blame_files_parallel(repo_path, files, view_commit=commit) + fossil = blame_files_oldest_fossil(repo_path, files, view_commit=commit) if fossil["file"] and fossil["timestamp"] < global_oldest["timestamp"]: global_oldest = fossil @@ -332,34 +236,36 @@ def get_survivor_fossil(repo_path: str | Path) -> dict: Living Fossil: the oldest line that is **still alive** in the codebase today. Strategy: checkout the current default branch HEAD, then blame every file. + + :param repo_path: Path to the git repository. + :return: A fossil dict for the oldest line still present, or a blank + fossil if no lines could be blamed. """ logger.info("Computing Survivor (Living) fossil...") - default_branch = get_default_branch(repo_path) + default_branch = get_default_branch(str(repo_path)) logger.info(" Checking out default branch: %s", default_branch) try: run_command( ["git", "checkout", "-B", default_branch, f"origin/{default_branch}"], - cwd=repo_path, + cwd=str(repo_path), ) except RuntimeError: - # Detached HEAD fallback run_command( - ["git", "checkout", "--force", f"origin/{default_branch}"], cwd=repo_path + ["git", "checkout", "--force", f"origin/{default_branch}"], + cwd=str(repo_path), ) - # For the Living Fossil, link to the branch name directly (not a frozen commit hash). - # This means the GitHub URL points to the current, living file — which is what "living" means. - # The file is guaranteed to exist on this branch since we ls-files it below. view_commit = default_branch - files = _get_tracked_files(repo_path) - if not files: + tracked_files = get_tracked_files(str(repo_path)) + if not tracked_files: logger.warning("No tracked files found at HEAD.") return _blank_fossil() - return _blame_files_parallel(repo_path, files, view_commit=view_commit) + logger.info(" Blaming %d tracked files...", len(tracked_files)) + return blame_files_oldest_fossil(repo_path, tracked_files, view_commit=view_commit) # --------------------------------------------------------------------------- @@ -369,8 +275,15 @@ def get_survivor_fossil(repo_path: str | Path) -> dict: def backfill_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: """ - For every repo JSON in data_dir, recompute both fossils without touching snapshots. - Always forces a fresh recompute of both genesis and survivor. + For every repo JSON in ``data_dir``, recompute both fossils without + touching snapshot data. + + Always forces a fresh recompute of both genesis and survivor for every + repository. + + :param data_dir: Path to the ``data/`` directory. + :param repo_urls: ``{repo_name: clone_url}`` mapping. + :return: ``True`` if any errors occurred, ``False`` otherwise. """ data_path = Path(data_dir) temp_dir = Path("./temp_fossil_repos") @@ -383,27 +296,18 @@ def backfill_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: repo_name = json_file.stem.replace("_data", "") repo_url = repo_urls.get(repo_name) - if not repo_url: logger.warning("No URL found for '%s', skipping.", repo_name) continue logger.info("━━━ Processing: %s ━━━", repo_name) - # 1. Load existing data (snapshots untouched) - with open(json_file, "r", encoding="utf-8") as f: - raw_data = json.load(f) - - if isinstance(raw_data, list): - snapshots = raw_data - else: - snapshots = raw_data.get("snapshots", []) - + data = load_snapshot_data(str(json_file)) + snapshots = data["snapshots"] if not snapshots: logger.warning(" No snapshots found in %s, skipping.", json_file.name) continue - # 2. Clone the repo if we don't have it locally already local_repo = temp_dir / repo_name if not local_repo.exists(): logger.info(" Cloning %s...", repo_url) @@ -411,27 +315,23 @@ def backfill_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: else: logger.info(" Repo already cloned — fetching latest...") try: - run_command(["git", "fetch", "--all"], cwd=local_repo) + run_command(["git", "fetch", "--all"], cwd=str(local_repo)) except RuntimeError as e: logger.warning(" Fetch failed (continuing with local): %s", e) - # 3. Compute fossils try: genesis = get_genesis_fossil(local_repo) survivor = get_survivor_fossil(local_repo) - fossils = {"genesis": genesis, "survivor": survivor} - # Validate — warn if something looks wrong if not genesis.get("file"): logger.warning(" ⚠ Genesis fossil is empty for %s", repo_name) if not survivor.get("file"): logger.warning(" ⚠ Survivor fossil is empty for %s", repo_name) if genesis.get("commit") == survivor.get("commit") and genesis.get("file"): logger.warning( - "⚠ Genesis and Survivor share the same commit (%s) " - "this may indicate the repo was never fully rewritten, which is valid, " - "or there may be a data issue.", + " ⚠ Genesis and Survivor share the same commit (%s) " + "— may indicate the repo was never fully rewritten.", genesis["commit"], ) @@ -450,23 +350,13 @@ def backfill_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: survivor.get("commit"), ) - # 4. Write back — snapshots are preserved as-is - tmp_file = json_file.with_suffix(f"{json_file.suffix}.tmp") - with open(tmp_file, "w", encoding="utf-8") as f: - json.dump( - {"snapshots": snapshots, "fossils": fossils}, - f, - separators=(",", ":"), - ) - os.replace(tmp_file, json_file) - + save_snapshot_data(str(json_file), snapshots, fossils) logger.info(" ✓ Successfully wrote fossils for %s", repo_name) - except Exception as e: # pylint: disable=broad-exception-caught + except Exception as e: # noqa: BLE001 logger.error(" ✗ Error computing fossils for %s: %s", repo_name, e) had_failures = True - # Clean up temp repos if temp_dir.exists(): remove_path(str(temp_dir)) @@ -479,15 +369,16 @@ def backfill_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: def update_survivor_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: - # pylint: disable=too-many-locals,too-many-branches,too-many-statements """ Refresh only the Survivor (Living) fossil for each repo. - Skips writing to disk if the fossil's file:line:commit hasn't changed. - This is designed to be fast and run on every monthly cron tick so that - the living fossil stays current even when no new snapshots are being added. + Skips writing to disk if the fossil's ``file:line:commit`` has not changed. + Designed to run on every monthly cron tick so the living fossil stays + current even when no new snapshots are being added. - Returns the number of repos where the survivor was updated. + :param data_dir: Path to the ``data/`` directory. + :param repo_urls: ``{repo_name: clone_url}`` mapping. + :return: ``True`` if any errors occurred, ``False`` otherwise. """ data_path = Path(data_dir) temp_dir = Path("./temp_fossil_repos") @@ -502,23 +393,15 @@ def update_survivor_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: repo_name = json_file.stem.replace("_data", "") repo_url = repo_urls.get(repo_name) - if not repo_url: logger.warning("No URL found for '%s', skipping.", repo_name) continue logger.info("━━━ Checking survivor for: %s ━━━", repo_name) - # 1. Load existing data - with open(json_file, "r", encoding="utf-8") as f: - raw_data = json.load(f) - - if isinstance(raw_data, list): - snapshots = raw_data - existing_fossils = {} - else: - snapshots = raw_data.get("snapshots", []) - existing_fossils = raw_data.get("fossils", {}) + data = load_snapshot_data(str(json_file)) + snapshots = data["snapshots"] + existing_fossils = data.get("fossils", {}) if not snapshots: logger.warning(" No snapshots found in %s, skipping.", json_file.name) @@ -526,7 +409,6 @@ def update_survivor_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: existing_survivor = existing_fossils.get("survivor", {}) - # 2. Clone or fetch the repo local_repo = temp_dir / repo_name if not local_repo.exists(): logger.info(" Cloning %s...", repo_url) @@ -534,11 +416,10 @@ def update_survivor_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: else: logger.info(" Fetching latest...") try: - run_command(["git", "fetch", "--all"], cwd=local_repo) + run_command(["git", "fetch", "--all"], cwd=str(local_repo)) except RuntimeError as e: logger.warning(" Fetch failed (continuing with local): %s", e) - # 3. Compute new survivor try: new_survivor = get_survivor_fossil(local_repo) @@ -557,7 +438,6 @@ def update_survivor_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: ) continue - # Something changed — log the diff clearly logger.info(" ↻ Survivor updated for %s:", repo_name) logger.info( " OLD: %s:%s @ %s", @@ -572,25 +452,15 @@ def update_survivor_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: new_survivor.get("commit"), ) - # 4. Write back — genesis is preserved, only survivor is replaced updated_fossils = {**existing_fossils, "survivor": new_survivor} - tmp_file = json_file.with_suffix(f"{json_file.suffix}.tmp") - with open(tmp_file, "w", encoding="utf-8") as f: - json.dump( - {"snapshots": snapshots, "fossils": updated_fossils}, - f, - separators=(",", ":"), - ) - os.replace(tmp_file, json_file) - + save_snapshot_data(str(json_file), snapshots, updated_fossils) logger.info(" ✓ Wrote updated survivor for %s", repo_name) updated_count += 1 - except Exception as e: # pylint: disable=broad-exception-caught + except Exception as e: # noqa: BLE001 logger.error(" ✗ Error updating survivor for %s: %s", repo_name, e) had_failures = True - # Clean up temp repos if temp_dir.exists(): remove_path(str(temp_dir)) @@ -604,21 +474,23 @@ def update_survivor_fossils(data_dir: str, repo_urls: dict[str, str]) -> bool: def main() -> None: - # pylint: disable=duplicate-code """ - Main entry point for fossil backfill and incremental survivor checking. + Entry point for fossil backfill and incremental survivor checking. + + CLI flags + --------- + --only REPO Process only a single repository (by config name). + --update-survivor Incremental mode: refresh only the Survivor fossil. """ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") config = load_config() data_dir = config.get("dataDir", "./data") - # Build dynamically from config: name -> github URL repo_urls = { repo["name"]: f"https://github.com/{repo['repo']}.git" for repo in config.get("repositories", []) if "name" in repo and "repo" in repo } - if not repo_urls: logger.error("No valid repositories found in configuration.") sys.exit(1) @@ -634,11 +506,8 @@ def main() -> None: parser.add_argument( "--update-survivor", action="store_true", - help=( - "Incremental mode: only refresh the Survivor (Living) fossil. " - "Skips writing if file:line:commit hasn't changed. " - "Genesis is left untouched. Used by GitHub Actions." - ), + help="Incremental mode: only refresh the Survivor fossil. Skips write " + "if unchanged. Genesis is left untouched.", ) args = parser.parse_args() diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index f456e55..70d648b 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -1,253 +1,177 @@ """ -This script is responsible for doing the heavy lifting. Processes repository snapshots incrementally to track code age distribution. -Uses quarterly resolution for historical data (pre-2025) and monthly for recent data (2025+). -Fossil computation is handled separately by add_fossils.py. +This script is responsible for the **snapshot generation** step of the Theseus +data pipeline. It clones (or fetches) a git repository, walks its commit +history at quarterly resolution (pre-2025) / monthly resolution (2025+), runs +``git blame --line-porcelain`` on all tracked files at each snapshot commit, +and aggregates the results into year-to-line-count distributions. + +The output JSON has the standard ``{snapshots, fossils}`` shape where +``fossils`` is left untouched (preserving any previously computed fossil data). +Fossil computation is handled separately by ``add_fossils.py``. + +Fossil data model +----------------- +Scripts in this pipeline use two fossil types: + + **Genesis** — the single oldest-authored line ever written in the repository. + **Survivor** — the single oldest-authored line still alive at current HEAD. + +Each fossil stores ``{timestamp, file, content, year, commit, view_commit, line}``. +See ``_blame.py`` for the full data-model definition and the algorithms used +to discover each fossil type. """ import argparse -import concurrent.futures -import json import logging import os import sys import time from collections import defaultdict -from datetime import datetime, timezone from itertools import groupby -# Ensure sibling imports from _utils work in all invocation contexts +# Ensure sibling imports work in all invocation contexts _SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) if _SCRIPTS_DIR not in sys.path: sys.path.insert(0, _SCRIPTS_DIR) -from _utils import run_command, get_default_branch, load_config, remove_path +from _blame import blame_files_year_counts +from _data_io import load_snapshot_data, save_snapshot_data +from _utils import get_default_branch, get_tracked_files, load_config, run_command logger = logging.getLogger(__name__) def clone_repository(repo_slug: str, clone_dir: str) -> None: """ - Dynamically clone a GitHub repository given its owner/name slug. + Clone a GitHub repository into a local directory. - :param repo_slug: The GitHub repository identifier (e.g., 'facebook/react'). - :param clone_dir: The local directory where the repository should be cloned. + :param repo_slug: GitHub ``owner/name`` slug (e.g. ``'facebook/react'``). + :param clone_dir: Local path to clone into. """ logger.info("Cloning %s into %s...", repo_slug, clone_dir) repo_url = f"https://github.com/{repo_slug}.git" run_command(["git", "clone", repo_url, clone_dir]) -def get_snapshots(repo_path: str) -> list[tuple[str, str]]: +def get_snapshot_periods(repo_path: str) -> list[tuple[str, str]]: """ - Identify commits for snapshots: quarterly for pre-2025, monthly for 2025+. + Identify (period, commit) snapshots from the repository's log. - Quarterly uses the last month of each quarter: 03, 06, 09, 12. + Resolution is quarterly (last month of each quarter: 03, 06, 09, 12) for + pre-2025 history and monthly for 2025+. :param repo_path: Path to the git repository. - :return: A list of tuples, each containing a 'YYYY-MM' period and the corresponding commit hash. + :return: List of ``(YYYY-MM, commit_hash)`` tuples sorted chronologically. """ log_output = run_command( cmd=["git", "log", "--pretty=format:%H|%cI"], cwd=repo_path ) - snapshots: dict[str, str] = {} + periods: dict[str, str] = {} for line in log_output.splitlines(): if not line: continue commit_hash, commit_date = line.split("|") period = commit_date[:7] - # Keep the first (newest) commit per period - if period not in snapshots: - snapshots[period] = commit_hash + if period not in periods: + periods[period] = commit_hash quarterly_months = {"03", "06", "09", "12"} - filtered_snapshots: dict[str, str] = {} + filtered: dict[str, str] = {} - for period, commit_hash in snapshots.items(): + for period, commit_hash in periods.items(): year = period[:4] month = period[5:7] - if int(year) >= 2025: - filtered_snapshots[period] = commit_hash + filtered[period] = commit_hash elif month in quarterly_months: - filtered_snapshots[period] = commit_hash + filtered[period] = commit_hash - return sorted(filtered_snapshots.items(), key=lambda x: x[0]) + return sorted(filtered.items(), key=lambda x: x[0]) -def _parse_blame_output(blame_output: str) -> dict[str, int]: +def _resolve_worker_count() -> int: """ - Parse git blame --line-porcelain output, returning a year -> line count mapping. + Determine the number of parallel blame workers. - :param blame_output: The raw output from git blame --line-porcelain - :return: A dictionary mapping years to the number of lines changed in that year - """ - file_distribution = defaultdict(int) - commit_to_year = {} - current_commit = None - - for line in blame_output.splitlines(): - if line.startswith("\t"): - if current_commit and current_commit in commit_to_year: - year = commit_to_year[current_commit] - file_distribution[year] += 1 - else: - parts = line.split(" ") - if len(parts[0]) in (40, 64) and all(c in "0123456789abcdef" for c in parts[0].lower()): - current_commit = parts[0] - elif parts[0] == "author-time": - try: - timestamp = int(parts[1]) - year = datetime.fromtimestamp(timestamp, timezone.utc).strftime( - "%Y" - ) - commit_to_year[current_commit] = year - except (ValueError, IndexError): - pass - - return dict(file_distribution) - - -def _blame_single_file(repo_path: str, file: str) -> dict[str, int]: - """ - Worker function to run git blame on a single file. - Designed to be run concurrently in a ThreadPool. - """ - try: - blame_output = run_command( - ["git", "blame", "--line-porcelain", file], cwd=repo_path - ) - return _parse_blame_output(blame_output) - except RuntimeError: - return {} - - -def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: - """ - Analyze the snapshots collected from the repository. + Default is ``min(8, cpu_count * 2)``. Override via ``BLAME_WORKERS`` + environment variable (clamped 1-100). - :param repo_path: Path to the repository - :param commit_hash: Hash of the commit to analyze - :return: Dictionary mapping birth year to line count + :return: Worker count (int). """ - run_command(["git", "checkout", commit_hash], cwd=repo_path) - files_output = run_command(["git", "ls-files"], cwd=repo_path) - files = files_output.splitlines() - - age_distribution = defaultdict(int) - - valid_files = [f for f in files if os.path.isfile(os.path.join(repo_path, f))] - - # Safe BLAME_WORKERS parsing with fallback. - # Default caps at 8 to avoid I/O contention on HDDs (git blame is - # I/O-bound, not CPU-bound, so the CPU-count multiplier doesn't apply). - # Override via BLAME_WORKERS env var (clamped 1-100). max_workers = min(8, (os.cpu_count() or 1) * 2) try: if "BLAME_WORKERS" in os.environ: max_workers = max(1, min(int(os.environ["BLAME_WORKERS"]), 100)) except ValueError: pass + return max_workers - logger.info(" Blaming %d valid files (%d workers)...", len(valid_files), max_workers) - total_files = len(valid_files) - completed = 0 - next_log_pct = 10 +def analyze_single_snapshot(repo_path: str, commit_hash: str) -> dict[str, int]: + """ + Analyse a single snapshot commit and return its year-to-line-count distribution. - with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: - future_to_file = { - executor.submit(_blame_single_file, repo_path, file): file - for file in valid_files - } + Checks out the commit, collects all tracked files, and runs parallel + ``git blame`` across them to determine how many lines were authored in + each year. - for future in concurrent.futures.as_completed(future_to_file): - file_dist = future.result() - for year, count in file_dist.items(): - age_distribution[year] += count + :param repo_path: Path to the git repository. + :param commit_hash: The commit (tag, branch, or hash) to analyse. + :return: ``{year: line_count}`` for this snapshot. + """ + run_command(["git", "checkout", commit_hash], cwd=repo_path) + tracked_files = get_tracked_files(repo_path) + age_distribution: dict[str, int] = defaultdict(int) - completed += 1 - pct = completed / total_files * 100 - if pct >= next_log_pct: - logger.info(" Blame progress: %d/%d (%.0f%%)", completed, total_files, pct) - next_log_pct += 10 + max_workers = _resolve_worker_count() + distribution = blame_files_year_counts(repo_path, tracked_files, max_workers) + for year, count in distribution.items(): + age_distribution[year] += count return dict(age_distribution) -def load_existing_state(json_fname: str) -> dict: - """ - Load the existing historical data supporting both old list and new object schemas. - - :param json_fname: Path to the existing JSON file containing the historical data. - :return: A dictionary with 'snapshots' and 'fossils'. - """ - if os.path.exists(json_fname): - try: - with open(json_fname, "r", encoding="utf-8") as f: - data = json.load(f) - if isinstance(data, list): - return {"snapshots": data, "fossils": {}} - return data - except json.JSONDecodeError: - logger.warning("%s is corrupted, starting fresh.", json_fname) - return {"snapshots": [], "fossils": {}} - return {"snapshots": [], "fossils": {}} - - -def _atomic_write_json( - json_path: str, snapshots: list[dict], fossils: dict | None = None -) -> None: - """Write JSON data atomically and minified to prevent corruption and save space.""" - tmp_path = json_path + ".tmp" - data = {"snapshots": snapshots, "fossils": fossils or {}} - with open(tmp_path, "w", encoding="utf-8") as f: - json.dump(data, f, separators=(",", ":")) - os.replace(tmp_path, json_path) - - def _filter_snapshots( - all_snapshots: list[tuple[str, str]], + all_periods: list[tuple[str, str]], processed_periods: set[str], reprocess: str | None = None, ) -> list[tuple[str, str]]: """ - Filter a list of (period, commit) snapshots down to unprocessed entries. + Filter (period, commit) pairs down to those that need processing. - When *reprocess* is provided (``YYYY-MM``), that specific period is - included regardless of whether it exists in *processed_periods*. + When *reprocess* is provided, that specific period is included even if it + was already processed. - :param all_snapshots: Full list of (period, commit) tuples. - :param processed_periods: Set of period strings that have already been processed. - :param reprocess: Optional period to re-run (e.g. ``"2023-06"``). + :param all_periods: Full list of (period, commit) tuples. + :param processed_periods: Set of period strings already on disk. + :param reprocess: Optional ``YYYY-MM`` period to force re-processing. :return: List of (period, commit) tuples that need processing. """ result: list[tuple[str, str]] = [] - for period, commit in all_snapshots: + for period, commit in all_periods: if period not in processed_periods or (reprocess and period == reprocess): result.append((period, commit)) return result -def process_repository(repo_slug: str, data_dir: str, reprocess: str | None = None) -> None: - # pylint: disable=too-many-locals,too-many-branches,too-many-statements +def process_repository( + repo_slug: str, data_dir: str, reprocess: str | None = None +) -> None: """ - Orchestrate the extraction of Ship of Theseus code persistence data - using an incremental load strategy by just processing the delta. + Process a single repository end-to-end. - Processes year-by-year and writes to disk after each year completes - to prevent data loss on crash. + Clones or updates the repo, then processes each new snapshot year-by-year, + writing intermediate results to disk after each year to prevent data loss + on crash. Existing fossil data is preserved untouched. - Fossil data is NOT touched here — that is handled by add_fossils.py. - - :param repo_slug: The GitHub repository identifier (e.g., 'facebook/react'). - :param data_dir: Path where the resulting JSON data will be saved. + :param repo_slug: GitHub ``owner/name`` slug. + :param data_dir: Path to the ``data/`` output directory. + :param reprocess: Optional ``YYYY-MM`` period to force re-processing. """ repo_name = repo_slug.split("/")[-1] - # Use the full slug (org/repo) in the temp dir name to avoid collisions - # when two different orgs have repos with the same name. temp_repo_path = f"./temp_workdir_{repo_slug.replace('/', '__')}" output_json_path = os.path.join(data_dir, f"{repo_name}_data.json") @@ -271,23 +195,23 @@ def process_repository(repo_slug: str, data_dir: str, reprocess: str | None = No ) run_command(["git", "pull"], cwd=temp_repo_path) - state = load_existing_state(output_json_path) + state = load_snapshot_data(output_json_path) historical_snapshots = state["snapshots"] - # Preserve any existing fossil data — do not touch it existing_fossils = state.get("fossils", {}) processed_periods = set(item["snapshot_date"] for item in historical_snapshots) - all_snapshots = get_snapshots(temp_repo_path) - new_snapshots = _filter_snapshots(all_snapshots, processed_periods, reprocess) + all_periods = get_snapshot_periods(temp_repo_path) + new_snapshots = _filter_snapshots(all_periods, processed_periods, reprocess) if not new_snapshots: logger.info( - "[%s] No new periods to process. Data is already up to date!", repo_name + "[%s] No new periods to process. Data is already up to date!", + repo_name, ) return logger.info( - "[%s] Processing %d new snapshots with hybrid resolution (quarterly pre-2025, monthly 2025+)", + "[%s] Processing %d new snapshots (quarterly pre-2025, monthly 2025+)", repo_name, len(new_snapshots), ) @@ -309,7 +233,7 @@ def process_repository(repo_slug: str, data_dir: str, reprocess: str | None = No for idx, (period, commit) in enumerate(year_snapshots_list, 1): logger.info( - "[%s] [%s] Processing %s (%d/%d) - Commit: %s", + "[%s] [%s] Processing %s (%d/%d) — Commit: %s", repo_name, year, period, @@ -319,7 +243,7 @@ def process_repository(repo_slug: str, data_dir: str, reprocess: str | None = No ) snapshot_start = time.perf_counter() - distribution = analyze_snapshots(temp_repo_path, commit) + distribution = analyze_single_snapshot(temp_repo_path, commit) snapshot_elapsed = time.perf_counter() - snapshot_start logger.info( @@ -344,11 +268,10 @@ def process_repository(repo_slug: str, data_dir: str, reprocess: str | None = No final_snapshots = historical_snapshots + total_new_data final_snapshots.sort(key=lambda x: x["snapshot_date"]) - # Write snapshot data, preserving existing fossil data untouched - _atomic_write_json(output_json_path, final_snapshots, existing_fossils) + save_snapshot_data(output_json_path, final_snapshots, existing_fossils) logger.info( - "[%s] Completed year %s in %.2f seconds. Wrote %d snapshots to disk.", + "[%s] Completed year %s in %.2f seconds. Wrote %d snapshots.", repo_name, year, year_elapsed, @@ -356,19 +279,20 @@ def process_repository(repo_slug: str, data_dir: str, reprocess: str | None = No ) finally: + from _utils import remove_path + remove_path(temp_repo_path) def main() -> None: """ - Main entry point. Loads configuration, creates output directory, - and runs the repository analysis pipeline for all specified targets. + Entry point for the snapshot-analysis pipeline. CLI flags --------- - --repo NAME Process only the given repository (by config name). - --reprocess YYYY-MM - Re-process a specific snapshot period even if it already exists in the data. + --repo NAME Process only the given repository (by config name). + --reprocess YYYY-MM Re-process a specific snapshot period even if it already + exists on disk. """ parser = argparse.ArgumentParser( description="Analyse repository git history for the Ship of Theseus pipeline." @@ -394,10 +318,9 @@ def main() -> None: ) config = load_config() - DATA_OUTPUT_DIR = config.get("dataDir", "./data") - os.makedirs(DATA_OUTPUT_DIR, exist_ok=True) + data_output_dir = config.get("dataDir", "./data") + os.makedirs(data_output_dir, exist_ok=True) - # Build from config: name -> repo slug all_targets: dict[str, str] = { repo["name"]: repo["repo"] for repo in config.get("repositories", []) @@ -424,7 +347,8 @@ def main() -> None: if args.reprocess: logger.info("Re-processing period: %s", args.reprocess) - # Bound top-level workers by CPU count + import concurrent.futures + max_top_level_workers = min( len(selected_targets), int(os.getenv("MAX_TOP_LEVEL_WORKERS", os.cpu_count() or 1)), @@ -437,7 +361,7 @@ def main() -> None: ) as executor: futures = { executor.submit( - process_repository, slug, DATA_OUTPUT_DIR, args.reprocess + process_repository, slug, data_output_dir, args.reprocess ): name for name, slug in selected_targets.items() } @@ -446,7 +370,7 @@ def main() -> None: try: future.result() logger.info("✓ %s completed successfully.", name) - except Exception as e: # pylint: disable=broad-exception-caught + except Exception as e: # noqa: BLE001 logger.error("Failed to process %s: %s", name, e) overall_elapsed = time.perf_counter() - overall_start diff --git a/scripts/cleanup_data.py b/scripts/cleanup_data.py index 8db9cb4..433fc75 100644 --- a/scripts/cleanup_data.py +++ b/scripts/cleanup_data.py @@ -1,26 +1,41 @@ """ -Module for cleaning up and minifying past snapshot data JSONs. +Clean up and minify past snapshot data JSONs for the Theseus pipeline. + +Per-file transformations (no logic changes): + +1. Removes the redundant ``total_lines`` field from every snapshot. +2. Removes future-year keys from every snapshot's ``composition`` dict + (e.g. a ``2023-06`` snapshot cannot contain ``2026`` entries). +3. Minifies the output JSON (no whitespace) to save disk space. + +Fossil data is left untouched — only snapshot content is cleaned. """ -import json -import os +import logging import sys from pathlib import Path -_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) -if _SCRIPTS_DIR not in sys.path: - sys.path.insert(0, _SCRIPTS_DIR) +# Ensure sibling imports work in all invocation contexts +_SCRIPTS_DIR = Path(__file__).resolve().parent +if str(_SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPTS_DIR)) +from _data_io import load_snapshot_data, save_snapshot_data from _utils import load_config +logger = logging.getLogger(__name__) + def cleanup_data(data_dir: str) -> bool: """ - Cleans up all JSON data files in the specified directory. - - Removes 'total_lines' (redundant) - - Removes future-year keys in 'composition' - - Minifies output - Returns True if an error occurred, False otherwise. + Clean and minify all JSON data files in the specified directory. + + For each file, snapshots are cleaned (remove ``total_lines``, remove + future-year composition keys) and the entire file is written back + minified. Fossil data is preserved unchanged. + + :param data_dir: Path to the ``data/`` directory. + :return: ``True`` if any errors occurred, ``False`` otherwise. """ data_path = Path(data_dir) if not data_path.exists() or not data_path.is_dir(): @@ -40,46 +55,41 @@ def cleanup_data(data_dir: str) -> bool: print(f"Processing {json_file.name}...") try: - with open(json_file, "r", encoding="utf-8") as f: - data = json.load(f) - - # Handle both list and object schemas - snapshots = data.get("snapshots", []) if isinstance(data, dict) else data + data = load_snapshot_data(str(json_file)) + snapshots = data["snapshots"] + fossils = data.get("fossils", {}) for snapshot in snapshots: - # 1. Remove redundant total_lines if "total_lines" in snapshot: del snapshot["total_lines"] - # 2. Filter future years snapshot_date = snapshot.get("snapshot_date") if snapshot_date: max_year = int(snapshot_date[:4]) composition = snapshot.get("composition", {}) keys_to_remove = [ - year for year in composition.keys() if int(year) > max_year + year_key + for year_key in composition.keys() + if int(year_key) > max_year ] for key in keys_to_remove: del composition[key] - # Write back with original schema - if isinstance(data, dict): - data["snapshots"] = snapshots - with open(json_file, "w", encoding="utf-8") as f: - json.dump(data, f, separators=(",", ":")) - else: - with open(json_file, "w", encoding="utf-8") as f: - json.dump(snapshots, f, separators=(",", ":")) + save_snapshot_data(str(json_file), snapshots, fossils) print(f" Successfully optimized and minified {json_file.name}") - except Exception as e: + except Exception as e: # noqa: BLE001 print(f" Error processing {json_file.name}: {e}") had_failures = True return had_failures -def main(): +def main() -> None: + """ + Entry point for data cleanup. + """ + logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") config = load_config() data_dir = config.get("dataDir", "./data") if cleanup_data(data_dir): diff --git a/scripts/run_pipeline.py b/scripts/run_pipeline.py new file mode 100644 index 0000000..b50b22f --- /dev/null +++ b/scripts/run_pipeline.py @@ -0,0 +1,212 @@ +""" +Unified orchestration script for the Theseus data pipeline. + +Runs all three stages in sequence on one or more repositories: + +1. **Analyse** (snapshot generation via ``analyse_repository``) +2. **Fossils** (genesis + survivor via ``add_fossils``) +3. **Cleanup** (future-year filtering + minification via ``cleanup_data``) + +Fossil data model +----------------- +The pipeline discovers two fossil types per repository: + +**Genesis** (Historical Fossil) + The single oldest-authored line ever written in the repository. Found by + blaming only files added in each of the earliest commits (sorted by + author-time), scanning until *stale_limit* consecutive commits fail to + improve the oldest-yet result. + +**Survivor** (Living Fossil) + The single oldest-authored line that still exists at the current HEAD. + Found by blaming every tracked file on the default branch and returning + the line with the smallest author-timestamp. + +Each fossil stores: ``{timestamp, file, content, year, commit, view_commit, line}``. +""" + +import logging +import os +import sys +import time + +# Ensure sibling imports work in all invocation contexts +_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +if _SCRIPTS_DIR not in sys.path: + sys.path.insert(0, _SCRIPTS_DIR) + +from _utils import load_config +from cleanup_data import cleanup_data as run_cleanup + +logger = logging.getLogger(__name__) + + +def run_pipeline( + repo: str | None = None, + reprocess: str | None = None, + update_survivor: bool = False, +) -> bool: + """ + Run the full pipeline (analyse → fossils → cleanup) for all repositories. + + :param repo: Optional repository name to process (None = all repos). + :param reprocess: Optional ``YYYY-MM`` period to force re-process. + :param update_survivor: If ``True``, skip genesis scan and only refresh the + survivor (living) fossil. Designed for monthly cron ticks. + :return: ``True`` if any stage failed, ``False`` otherwise. + """ + config = load_config() + data_dir = config.get("dataDir", "./data") + os.makedirs(data_dir, exist_ok=True) + + # Build target lists from config + all_repos: list[dict] = config.get("repositories", []) + if not all_repos: + logger.error("No repositories found in configuration.") + return True + + if repo: + selected = [r for r in all_repos if r.get("name") == repo] + if not selected: + logger.error("Unknown repository '%s'.", repo) + return True + logger.info("Pipeline running for single repository: %s", repo) + else: + selected = all_repos + logger.info("Pipeline running for %d repositories.", len(selected)) + + had_failures = False + + # ── Stage 1: Analyse ────────────────────────────────────────────── + logger.info("═══ STAGE 1: Snapshot analysis ═══") + from analyse_repository import ( + process_repository, + ) + + for repo_info in selected: + repo_slug = repo_info.get("repo", "") + repo_name = repo_info.get("name", "") + if not repo_slug or not repo_name: + logger.warning("Skipping repo entry with missing slug/name: %s", repo_info) + continue + + logger.info(" Analysing %s (%s)...", repo_name, repo_slug) + try: + process_repository(repo_slug, data_dir, reprocess) + logger.info(" ✓ %s — snapshot analysis complete.", repo_name) + except Exception as e: # noqa: BLE001 + logger.error(" ✗ %s — snapshot analysis failed: %s", repo_name, e) + had_failures = True + + # ── Stage 2: Fossils ─────────────────────────────────────────────── + from add_fossils import backfill_fossils, update_survivor_fossils + + repo_urls = { + r["name"]: f"https://github.com/{r['repo']}.git" + for r in selected + if "name" in r and "repo" in r + } + + if update_survivor: + logger.info("═══ STAGE 2: Survivor-only refresh ═══") + if repo_urls: + try: + fossil_errors = update_survivor_fossils(data_dir, repo_urls) + if fossil_errors: + had_failures = True + else: + logger.info(" ✓ All survivors refreshed.") + except Exception as e: # noqa: BLE001 + logger.error(" ✗ Survivor stage failed: %s", e) + had_failures = True + else: + logger.warning(" Skipping survivor stage — no valid repos.") + else: + logger.info("═══ STAGE 2: Full fossil discovery ═══") + if repo_urls: + logger.info(" Computing fossils for %d repos...", len(repo_urls)) + try: + fossil_errors = backfill_fossils(data_dir, repo_urls) + if fossil_errors: + had_failures = True + else: + logger.info(" ✓ All fossils computed.") + except Exception as e: # noqa: BLE001 + logger.error(" ✗ Fossil stage failed: %s", e) + had_failures = True + else: + logger.warning(" Skipping fossil stage — no valid repos.") + + # ── Stage 3: Cleanup ─────────────────────────────────────────────── + logger.info("═══ STAGE 3: Data cleanup ═══") + try: + cleanup_errors = run_cleanup(data_dir) + if cleanup_errors: + had_failures = True + else: + logger.info(" ✓ Cleanup complete.") + except Exception as e: # noqa: BLE001 + logger.error(" ✗ Cleanup stage failed: %s", e) + had_failures = True + + return had_failures + + +def main() -> None: + """ + Entry point for the unified pipeline runner. + + CLI flags + --------- + --repo NAME Process only this repository (by config name). + --reprocess YYYY-MM Re-process a specific snapshot period. + --update-survivor Skip genesis scan; refresh only the survivor fossil + (designed for monthly cron ticks). + """ + import argparse + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + parser = argparse.ArgumentParser( + description="Run the full Theseus pipeline: analyse → fossils → cleanup." + ) + parser.add_argument( + "--repo", + metavar="NAME", + default=None, + help="Only process this repository (e.g. 'react'). If omitted, all repos are processed.", + ) + parser.add_argument( + "--reprocess", + metavar="YYYY-MM", + default=None, + help="Re-process a specific snapshot period (e.g. '2023-06').", + ) + parser.add_argument( + "--update-survivor", + action="store_true", + help="Skip genesis scan; refresh only the survivor (living) fossil.", + ) + args = parser.parse_args() + + overall_start = time.perf_counter() + had_errors = run_pipeline( + repo=args.repo, + reprocess=args.reprocess, + update_survivor=args.update_survivor, + ) + elapsed = time.perf_counter() - overall_start + + if had_errors: + logger.error("Pipeline finished with errors (%.2f seconds).", elapsed) + sys.exit(1) + + logger.info("Pipeline completed successfully (%.2f seconds).", elapsed) + + +if __name__ == "__main__": + main() diff --git a/tests/test_analyse_repository.py b/tests/test_analyse_repository.py index 3882cfa..87f5a19 100644 --- a/tests/test_analyse_repository.py +++ b/tests/test_analyse_repository.py @@ -1,5 +1,5 @@ """ -Tests for the analyse repository module. +Tests for the snapshot analysis module and its shared dependencies. """ import json @@ -10,15 +10,13 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # pylint: disable=wrong-import-position,import-error -from scripts.analyse_repository import ( - _filter_snapshots, - _parse_blame_output, - load_existing_state, -) +from scripts._blame import parse_blame_year_counts +from scripts._data_io import load_snapshot_data +from scripts.analyse_repository import _filter_snapshots -class TestParseBlameOutput: - """Tests for the git blame output parser.""" +class TestParseBlameYearCounts: + """Tests for parsing git blame --line-porcelain output into year counts.""" def test_single_file_single_author_year(self): """Test parsing a blame output with a single commit and author.""" @@ -29,7 +27,7 @@ def test_single_file_single_author_year(self): "filename test.py\n" "\tprint('hello world')\n" ) - result = _parse_blame_output(blame_output) + result = parse_blame_year_counts(blame_output) year = datetime.fromtimestamp(1704067200, timezone.utc).strftime("%Y") assert result == {year: 1} @@ -47,7 +45,7 @@ def test_multiple_commits_different_years(self): "filename test.py\n" "\tconst y = 2;\n" ) - result = _parse_blame_output(blame_output) + result = parse_blame_year_counts(blame_output) year_2021 = datetime.fromtimestamp(1609459200, timezone.utc).strftime("%Y") year_2024 = datetime.fromtimestamp(1704067200, timezone.utc).strftime("%Y") assert result[year_2021] == 1 @@ -64,13 +62,13 @@ def test_lines_attributed_to_correct_year(self): "\tline two\n" "\tline three\n" ) - result = _parse_blame_output(blame_output) + result = parse_blame_year_counts(blame_output) year = datetime.fromtimestamp(1609459200, timezone.utc).strftime("%Y") assert result[year] == 3 def test_empty_output(self): """Test parsing an empty blame output.""" - result = _parse_blame_output("") + result = parse_blame_year_counts("") assert result == {} def test_invalid_timestamp_ignored(self): @@ -82,7 +80,7 @@ def test_invalid_timestamp_ignored(self): "filename test.py\n" "\tprint('hello')\n" ) - result = _parse_blame_output(blame_output) + result = parse_blame_year_counts(blame_output) assert result == {} def test_40_and_64_char_hashes(self): @@ -94,13 +92,13 @@ def test_40_and_64_char_hashes(self): "filename test.py\n" "\tprint('hello')\n" ) - result = _parse_blame_output(blame_output) + result = parse_blame_year_counts(blame_output) year = datetime.fromtimestamp(1704067200, timezone.utc).strftime("%Y") assert year in result -class TestLoadExistingState: - """Tests for loading existing JSON state.""" +class TestLoadSnapshotData: + """Tests for loading snapshot data from JSON files.""" def test_load_valid_json(self): """Test loading a correctly formatted existing JSON state.""" @@ -120,8 +118,7 @@ def test_load_valid_json(self): json.dump(data, f) f.flush() - result = load_existing_state(f.name) - # load_existing_state always returns {"snapshots": [...], "fossils": {}} + result = load_snapshot_data(f.name) assert "snapshots" in result assert "fossils" in result snapshots = result["snapshots"] @@ -132,7 +129,7 @@ def test_load_valid_json(self): def test_file_not_exists(self): """Test loading state when the requested file does not exist, expecting a blank default structure.""" - result = load_existing_state("/nonexistent/path/data.json") + result = load_snapshot_data("/nonexistent/path/data.json") assert result == {"snapshots": [], "fossils": {}} def test_corrupted_json_returns_empty(self): @@ -141,7 +138,7 @@ def test_corrupted_json_returns_empty(self): f.write("not valid json {") f.flush() - result = load_existing_state(f.name) + result = load_snapshot_data(f.name) assert result == {"snapshots": [], "fossils": {}} os.unlink(f.name) From d5034f31f9cbbfb98edb36646f0a5c3e2c5cbfcc Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sun, 31 May 2026 17:48:25 +0530 Subject: [PATCH 8/9] #33 fixed minor bugs caught during code review --- pyproject.toml | 3 +++ scripts/_utils.py | 33 ++++++++++++++++----------------- theseus.config.json | 2 +- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 58c6973..b30ea22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,9 @@ max-line-length = 120 [tool.pylint.messages_control] disable = ["fixme"] +[tool.poetry] +package-mode = false + [build-system] requires = ["poetry-core>=2.2.0,<3.0.0"] build-backend = "poetry.core.masonry.api" diff --git a/scripts/_utils.py b/scripts/_utils.py index 2b72de1..93c42ec 100644 --- a/scripts/_utils.py +++ b/scripts/_utils.py @@ -12,6 +12,7 @@ import logging import os import shutil +import stat import subprocess import sys import time @@ -160,25 +161,23 @@ def remove_path(path: str) -> None: except (subprocess.SubprocessError, OSError): pass - # Fallback: retry with shutil.rmtree + # Fallback: retry with shutil.rmtree, fixing permissions on each retry + def handle_remove_readonly(func, path, _exc_info): + try: + current_mode = os.stat(path).st_mode + os.chmod( + path, + current_mode | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH, + ) + func(path) + except PermissionError: + pass + except Exception: # noqa: BLE001 + pass + for attempt in range(3): try: - shutil.rmtree(path, ignore_errors=False) - - def handle_remove_readonly(func, path, _exc_info): - try: - current_mode = os.stat(path).st_mode - os.chmod( - path, - current_mode | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH, - ) - func(path) - except PermissionError: - pass - except Exception: # noqa: BLE001 - pass - - shutil.rmtree(path, onexc=handle_remove_readonly) + shutil.rmtree(path, onerror=handle_remove_readonly) break except Exception: # noqa: BLE001 if attempt < 2: diff --git a/theseus.config.json b/theseus.config.json index 709df00..49cba10 100644 --- a/theseus.config.json +++ b/theseus.config.json @@ -41,7 +41,7 @@ "description": "Introduced Hooks for state management without classes." }, { - "date": "2024-06", + "date": "2024-12", "title": "React 19 major update", "description": "Added Server Components and new form handling." } From 0489db6cb96a79b436c9ae661c0d7c4b9680cb10 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sun, 31 May 2026 17:56:00 +0530 Subject: [PATCH 9/9] #33 fixed code review errors --- .github/workflows/theseus-engine.yml | 2 +- scripts/_data_io.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/theseus-engine.yml b/.github/workflows/theseus-engine.yml index e256e91..025b7d2 100644 --- a/.github/workflows/theseus-engine.yml +++ b/.github/workflows/theseus-engine.yml @@ -38,7 +38,7 @@ jobs: - name: Create pull request for data updates if: success() - uses: peter-evans/create-pull-request@v6 + uses: peter-evans/create-pull-request@b1ddad2c994a25fbc81a28b3ec0e368bb2021c50 # v6.0.0 with: token: ${{ secrets.GITHUB_TOKEN }} commit-message: "chore: update theseus persistence data across all repos" diff --git a/scripts/_data_io.py b/scripts/_data_io.py index 541c434..ba2c21f 100644 --- a/scripts/_data_io.py +++ b/scripts/_data_io.py @@ -67,7 +67,15 @@ def load_snapshot_data(file_path: str) -> dict: data = json.load(f) if isinstance(data, list): return {"snapshots": data, "fossils": {}} - return data + if isinstance(data, dict): + snapshots = data.get("snapshots") + if not isinstance(snapshots, list): + snapshots = [] + fossils = data.get("fossils") + if not isinstance(fossils, dict): + fossils = {} + return {"snapshots": snapshots, "fossils": fossils} + return {"snapshots": [], "fossils": {}} except json.JSONDecodeError: logger.warning("%s is corrupted, starting fresh.", file_path) return {"snapshots": [], "fossils": {}}