From 9f659ddb23091bdf75bca01881823c1639e2afc6 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sat, 4 Apr 2026 19:35:51 +0530 Subject: [PATCH 1/4] #3 made the base html, css and js template --- app.js | 313 +++++++++++++++++++++++++++++++++ data/manifest.json | 7 + index.html | 72 ++++++++ scripts/analyse_repository.py | 4 +- style.css | 316 ++++++++++++++++++++++++++++++++++ 5 files changed, 709 insertions(+), 3 deletions(-) create mode 100644 app.js create mode 100644 data/manifest.json create mode 100644 index.html create mode 100644 style.css diff --git a/app.js b/app.js new file mode 100644 index 0000000..eeb994f --- /dev/null +++ b/app.js @@ -0,0 +1,313 @@ +/** + * Ship of Theseus - Code Visualizer + * Core Logic (Vanilla JS + SVG) + */ + +class TheseusVisualizer { + constructor() { + this.manifest = null; + this.currentData = null; + this.currentRepo = null; + this.canvas = document.getElementById('main-chart'); + this.tooltip = document.getElementById('tooltip'); + this.repoSelector = document.getElementById('repo-selector'); + this.repoDescription = document.getElementById('repo-description'); + this.loadingState = document.getElementById('chart-loading'); + + this.margin = { top: 40, right: 20, bottom: 60, left: 60 }; + this.years = []; + this.points = []; + + this.init(); + } + + async init() { + try { + const response = await fetch('data/manifest.json'); + let data = await response.json(); + + // Normalize manifest to array + this.manifest = Array.isArray(data) ? data : [data]; + + this.renderSelectors(); + if (this.manifest.length > 0) { + this.loadRepo(this.manifest[0].name); + } + } catch (err) { + this.showError("Failed to load repository manifest."); + } + + window.addEventListener('resize', () => this.debouncedRender()); + } + + renderSelectors() { + this.repoSelector.innerHTML = ''; + this.manifest.forEach(repo => { + const btn = document.createElement('button'); + btn.className = 'repo-btn'; + btn.textContent = repo.name.replace(/-/g, ' '); + btn.dataset.repo = repo.name; + btn.onclick = () => this.loadRepo(repo.name); + this.repoSelector.appendChild(btn); + }); + } + + async loadRepo(repoName) { + if (this.currentRepo === repoName) return; + + this.currentRepo = repoName; + this.updateActiveBtn(repoName); + this.showLoading(true); + this.hideError(); + + try { + const repoInfo = this.manifest.find(r => r.name === repoName); + const fileName = repoInfo.file; + this.repoDescription.textContent = repoInfo.description || ''; + + const response = await fetch(`data/${fileName}`); + this.currentData = await response.json(); + + this.processData(); + this.renderChart(); + this.updateInsights(); + this.showLoading(false); + } catch (err) { + console.error(err); + this.showError(`Failed to load data for ${repoName}`); + } + } + + updateActiveBtn(name) { + document.querySelectorAll('.repo-btn').forEach(btn => { + btn.classList.toggle('active', btn.dataset.repo === name); + }); + } + + processData() { + // Collect all composing years + const yearSet = new Set(); + this.currentData.forEach(d => { + Object.keys(d.composition).forEach(y => yearSet.add(y)); + }); + this.years = Array.from(yearSet).sort(); + + // Map data points + this.points = this.currentData.map(d => { + const point = { + date: d.snapshot_date, + total: d.total_lines, + composition: d.composition, + stack: {} + }; + + let cumulative = 0; + this.years.forEach(year => { + const value = d.composition[year] || 0; + point.stack[year] = { + start: cumulative, + end: cumulative + value, + value: value + }; + cumulative += value; + }); + + return point; + }); + } + + renderChart() { + const { width, height } = this.canvas.getBoundingClientRect(); + this.canvas.innerHTML = ''; // Clear previous + + // Define Gradients + const defs = this.createSVGElement('defs'); + this.years.forEach((year, i) => { + const hue = (180 + i * 40) % 360; // Spread colors starting from Cyan + const grad = this.createSVGElement('linearGradient', { + id: `grad-${year}`, + x1: '0%', y1: '0%', x2: '0%', y2: '100%' + }); + grad.appendChild(this.createSVGElement('stop', { offset: '0%', 'stop-color': `hsl(${hue}, 70%, 55%)`, 'stop-opacity': 0.8 })); + grad.appendChild(this.createSVGElement('stop', { offset: '100%', 'stop-color': `hsl(${hue}, 70%, 55%)`, 'stop-opacity': 0.1 })); + defs.appendChild(grad); + }); + this.canvas.appendChild(defs); + + const chartWidth = width - this.margin.left - this.margin.right; + const chartHeight = height - this.margin.top - this.margin.bottom; + + // Scales + const xScale = (i) => (i / (this.points.length - 1)) * chartWidth; + const maxVal = Math.max(...this.points.map(p => p.total)); + const yScale = (v) => chartHeight - (v / maxVal) * chartHeight; + + // Render Areas + const group = this.createSVGElement('g', { transform: `translate(${this.margin.left}, ${this.margin.top})` }); + + this.years.forEach(year => { + const pathData = this.points.map((p, i) => `${xScale(i)},${yScale(p.stack[year].end)}`); + const bottomData = this.points.map((p, i) => `${xScale(i)},${yScale(p.stack[year].start)}`).reverse(); + + const areaPath = this.createSVGElement('path', { + d: `M${pathData.join(' L')} L${bottomData.join(' L')} Z`, + fill: `url(#grad-${year})`, + class: 'chart-area', + 'data-year': year + }); + + // Initial state for animation + areaPath.style.opacity = '0'; + areaPath.style.transition = 'opacity 1.5s ease-out'; + group.appendChild(areaPath); + + setTimeout(() => areaPath.style.opacity = '1', 50); + }); + + // Axes (Subtle) + this.renderAxes(group, chartWidth, chartHeight, xScale, yScale, maxVal); + + // Interaction Overlay + const overlay = this.createSVGElement('rect', { + width: chartWidth, + height: chartHeight, + fill: 'transparent' + }); + + overlay.onmousemove = (e) => this.handleMouseMove(e, chartWidth, xScale); + overlay.onmouseleave = () => this.hideTooltip(); + group.appendChild(overlay); + + this.canvas.appendChild(group); + } + + renderAxes(group, width, height, xScale, yScale, maxVal) { + // Horizontal Grid Lines + const tickCount = 5; + for (let i = 0; i <= tickCount; i++) { + const val = (i / tickCount) * maxVal; + const y = yScale(val); + + const line = this.createSVGElement('line', { + x1: 0, x2: width, y1: y, y2: y, + stroke: '#374151', 'stroke-dasharray': '3,3', 'stroke-opacity': 0.5 + }); + group.appendChild(line); + + const label = this.createSVGElement('text', { + x: -10, y: y + 4, 'text-anchor': 'end', fill: '#6b7280', 'font-size': '10px' + }); + label.textContent = val >= 1000 ? `${(val/1000).toFixed(1)}k` : Math.round(val); + group.appendChild(label); + } + + // X Axis labels (Show every few snapshots) + const xStep = Math.max(1, Math.floor(this.points.length / 6)); + this.points.forEach((p, i) => { + if (i % xStep === 0 || p.date.endsWith('-01')) { + const label = this.createSVGElement('text', { + x: xScale(i), y: height + 25, 'text-anchor': 'middle', fill: '#6b7280', 'font-size': '10px' + }); + label.textContent = p.date.endsWith('-01') ? p.date.split('-')[0] : (i % xStep === 0 ? p.date : ''); + if (label.textContent) group.appendChild(label); + } + }); + } + + handleMouseMove(e, chartWidth, xScale) { + const svgRect = this.canvas.getBoundingClientRect(); + const mouseX = e.clientX - svgRect.left - this.margin.left; + + // Find nearest point + const index = Math.round((mouseX / chartWidth) * (this.points.length - 1)); + if (index >= 0 && index < this.points.length) { + const p = this.points[index]; + this.showTooltip(p, e.clientX, e.clientY); + } + } + + showTooltip(point, x, y) { + this.tooltip.classList.remove('hidden'); + this.tooltip.style.left = `${x + 15}px`; + this.tooltip.style.top = `${y + 15}px`; + + let compositionHtml = ''; + this.years.slice().sort((a,b) => b-a).forEach(year => { + const val = point.composition[year] || 0; + if (val > 0) { + const pct = ((val / point.total) * 100).toFixed(1); + compositionHtml += ` +
+ Code from ${year}: + ${val.toLocaleString()} lines (${pct}%) +
+ `; + } + }); + + this.tooltip.innerHTML = ` +
Snapshot: ${point.date}
+
+ Total Size: + ${point.total.toLocaleString()} lines +
+ ${compositionHtml} + `; + } + + hideTooltip() { + this.tooltip.classList.add('hidden'); + } + + updateInsights() { + if (!this.currentData || this.currentData.length === 0) return; + + const first = this.currentData[0]; + const last = this.currentData[this.currentData.length - 1]; + + // % Replaced calculation + const originalYear = Object.keys(first.composition)[0]; + const originalLinesInLast = last.composition[originalYear] || 0; + const originalLinesInFirst = first.total_lines; + + const replaced = ((originalLinesInFirst - originalLinesInLast) / originalLinesInFirst) * 100; + document.getElementById('percent-replaced').textContent = `${Math.min(100, Math.max(0, replaced)).toFixed(1)}%`; + + // Oldest line logic + const oldestYear = this.years[0]; + document.getElementById('oldest-line').textContent = oldestYear; + } + + // Utilities + createSVGElement(tag, attrs = {}) { + const el = document.createElementNS('http://www.w3.org/2000/svg', tag); + Object.entries(attrs).forEach(([key, val]) => el.setAttribute(key, val)); + return el; + } + + showLoading(show) { + this.loadingState.classList.toggle('hidden', !show); + } + + showError(msg) { + const error = document.getElementById('chart-error'); + error.textContent = msg; + error.classList.remove('hidden'); + } + + hideError() { + document.getElementById('chart-error').classList.add('hidden'); + } + + debouncedRender() { + clearTimeout(this.resizeTimer); + this.resizeTimer = setTimeout(() => { + if (this.currentData) this.renderChart(); + }, 100); + } +} + +// Spark the void +document.addEventListener('DOMContentLoaded', () => { + new TheseusVisualizer(); +}); diff --git a/data/manifest.json b/data/manifest.json new file mode 100644 index 0000000..48a28df --- /dev/null +++ b/data/manifest.json @@ -0,0 +1,7 @@ +[ + { + "name": "claude-code", + "file": "claude-code_data.json", + "description": "Claude's efficient, local-first coding CLI tool." + } +] diff --git a/index.html b/index.html new file mode 100644 index 0000000..4fca236 --- /dev/null +++ b/index.html @@ -0,0 +1,72 @@ + + + + + + Ship of Theseus | Code Visualizer + + + + + + + +
+ +
+
+

The Ship of Theseus

+

Does a codebase remain the same if every line is replaced? A monthly pulse on software entropy.

+ +
+ +
+

+
+ +
+
[ Scanning repository history... ]
+ + +
+ + +
+
+ +
+
+

Evolution Metrics

+
+

--

+

of original code replaced

+
+
+
+

Ancestry Analysis

+
+

--

+

Oldest surviving logic (Year)

+
+
+
+ +
+
+

How to read this chart

+

The X-axis represents time moving forward. The Y-axis shows total lines of code. Each colored band represents the surviving code originally written in a specific year.

+
+
+

The Architecture

+

This data is generated monthly via a static analysis pipeline. It dynamically scans the repository history and outputs a structured JSON file to minimize visual overhead in the browser.

+
+
+ +
+ +
+
+ + + + diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 12519b9..67a78fe 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -295,9 +295,7 @@ def process_repository(repo_slug: str, data_dir: str) -> None: os.makedirs(DATA_OUTPUT_DIR, exist_ok=True) # The Case Studies: Start with this one to benchmark. - TARGETS = [ - "anthropics/claude-code", - ] + TARGETS = ["anthropics/claude-code", "facebook/react", "langchain-ai/langchain"] overall_start = time.perf_counter() for target in TARGETS: diff --git a/style.css b/style.css new file mode 100644 index 0000000..4caf743 --- /dev/null +++ b/style.css @@ -0,0 +1,316 @@ +:root { + --bg-void: #0a0a0c; + --text-primary: #e0e0e0; + --text-secondary: #9ca3af; + --accent-cyan: hsl(180, 70%, 55%); + --accent-purple: hsl(270, 70%, 55%); + --accent-orange: hsl(30, 70%, 55%); + --glass-background: rgba(17, 19, 25, 0.7); + --glass-border: rgba(255, 255, 255, 0.1); + --font-serif: "Playfair Display", serif; + --font-mono: "JetBrains Mono", monospace; +} + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + background-color: var(--bg-void); + color: var(--text-primary); + font-family: var(--font-mono); + line-height: 1.6; + overflow-x: hidden; + min-height: 100vh; +} + +/* Noise overlay */ +.noise-overlay { + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + pointer-events: none; + opacity: 0.05; + z-index: 1000; + background: url('https://grainy-gradients.vercel.app/noise.svg'); +} + +.container { + max-width: 1200px; + margin: 0 auto; + padding: 4rem 2rem; + display: flex; + flex-direction: column; + gap: 3rem; +} + +/* Hero Section */ +.hero { + text-align: center; + display: flex; + flex-direction: column; + align-items: center; + gap: 1.5rem; +} + +.title { + font-family: var(--font-serif); + font-size: 4rem; + font-weight: 900; + letter-spacing: -0.02em; + background: linear-gradient(180deg, #fff, #999); + -webkit-background-clip: text; + background-clip: text; + -webkit-text-fill-color: transparent; + text-shadow: 0 0 30px rgba(255, 255, 255, 0.1); +} + +.subtitle { + font-size: 1.1rem; + color: var(--text-secondary); + max-width: 600px; +} + + +/* Selector Pill */ +.selector-pill { + display: flex; + gap: 0.5rem; + padding: 0.5rem; + background: rgba(255, 255, 255, 0.05); + border: 1px solid var(--glass-border); + border-radius: 9999px; + backdrop-filter: blur(8px); +} + +.repo-btn { + padding: 0.75rem 1.5rem; + border-radius: 9999px; + border: none; + background: transparent; + color: var(--text-secondary); + font-family: var(--font-mono); + font-size: 0.9rem; + cursor: pointer; + transition: all 0.3s ease; + opacity: 0.6; +} + +.repo-btn:hover { + opacity: 1; + color: var(--text-primary); +} + +.repo-btn.active { + background: rgba(255, 255, 255, 0.1); + color: var(--text-primary); + opacity: 1; + box-shadow: 0 0 20px rgba(255, 255, 255, 0.05); + border: 1px solid rgba(255, 255, 255, 0.15); +} + +.repo-description { + font-style: italic; + font-size: 0.9rem; + color: var(--text-secondary); + opacity: 0.7; + margin-top: -0.5rem; +} + +/* Visualization Canvas */ +.glass-panel { + background: var(--glass-background); + border: 1px solid var(--glass-border); + border-radius: 2rem; + backdrop-filter: blur(20px); + overflow: hidden; + position: relative; + box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.5); +} + +.visualization-canvas { + min-height: 500px; + display: flex; + flex-direction: column; + padding: 2rem; +} + +.loading-state { + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + font-size: 1.2rem; + animation: pulse 2s infinite; +} + +.error-banner { + background: rgba(153, 27, 27, 0.2); + color: #f87171; + border: 1px solid rgba(153, 27, 27, 0.4); + padding: 1rem; + border-radius: 1rem; + margin-bottom: 1rem; + text-align: center; +} + +.hidden { + display: none !important; +} + +@keyframes pulse { + 0% { opacity: 0.3; } + 50% { opacity: 0.8; } + 100% { opacity: 0.3; } +} + +/* Chart Canvas */ +.chart-wrapper { + flex: 1; + width: 100%; + position: relative; +} + +svg#main-chart { + width: 100%; + height: 100%; + min-height: 400px; +} + +/* Custom Tooltip */ +.custom-tooltip { + position: absolute; + pointer-events: none; + background: rgba(10, 10, 12, 0.9); + border: 1px solid var(--glass-border); + padding: 1rem; + border-radius: 0.75rem; + backdrop-filter: blur(12px); + z-index: 100; + min-width: 200px; + box-shadow: 0 10px 30px rgba(0,0,0,0.5); +} + +.tooltip-header { + font-size: 0.8rem; + color: var(--text-secondary); + margin-bottom: 0.5rem; + border-bottom: 1px solid var(--glass-border); + padding-bottom: 0.25rem; +} + +.tooltip-item { + display: flex; + justify-content: space-between; + font-size: 0.9rem; + margin-top: 0.25rem; +} + +/* Insights Cards */ +.insights-grid { + display: grid; + grid-template-columns: 1fr; + gap: 2rem; +} + +@media (min-width: 768px) { + .insights-grid { + grid-template-columns: repeat(2, 1fr); + } +} + +.insight-card { + padding: 2.5rem; + transition: all 0.4s cubic-bezier(0.175, 0.885, 0.32, 1.275); +} + +.insight-card:hover { + transform: translateY(-8px); + border-color: rgba(255, 255, 255, 0.3); + box-shadow: 0 20px 40px -10px rgba(0, 0, 0, 0.6); +} + +.card-title { + font-size: 0.9rem; + text-transform: uppercase; + letter-spacing: 0.1em; + color: var(--text-secondary); + margin-bottom: 1.5rem; +} + +.metric-value { + font-family: var(--font-serif); + font-size: 3.5rem; + font-weight: 700; + line-height: 1; + margin-bottom: 0.5rem; + background: linear-gradient(135deg, var(--accent-cyan), var(--accent-purple)); + -webkit-background-clip: text; + background-clip: text; + -webkit-text-fill-color: transparent; +} + +.metric-label { + font-size: 0.9rem; + color: var(--text-secondary); +} + +/* Info Grid */ +.info-grid { + display: grid; + grid-template-columns: 1fr; + gap: 2rem; +} + +@media (min-width: 768px) { + .info-grid { + grid-template-columns: repeat(2, 1fr); + } +} + +.info-card { + padding: 1.5rem; + font-size: 0.9rem; +} + +.info-card .card-title { + font-size: 0.8rem; + margin-bottom: 0.75rem; +} + +.card-text { + line-height: 1.6; + color: var(--text-secondary); +} + +/* Footer */ +.footer { + text-align: center; + padding-top: 2rem; + border-top: 1px solid var(--glass-border); +} + +.footer-text { + font-size: 0.8rem; + color: var(--text-secondary); + opacity: 0.5; +} + +/* Scrollbar */ +::-webkit-scrollbar { + width: 8px; +} +::-webkit-scrollbar-track { + background: var(--bg-void); +} +::-webkit-scrollbar-thumb { + background: rgba(255, 255, 255, 0.1); + border-radius: 4px; +} +::-webkit-scrollbar-thumb:hover { + background: rgba(255, 255, 255, 0.2); +} From 1f51102a1bc3ba4737373a5d85927a7a3e7eb6ab Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sun, 5 Apr 2026 00:28:48 +0530 Subject: [PATCH 2/4] #10 updated the script to be fault tolerant --- data/langchain_data.json | 44 ++++++ data/react_data.json | 131 +++++++++++++++++ scripts/analyse_repository.py | 270 +++++++++++++++++++++------------- 3 files changed, 341 insertions(+), 104 deletions(-) create mode 100644 data/langchain_data.json create mode 100644 data/react_data.json diff --git a/data/langchain_data.json b/data/langchain_data.json new file mode 100644 index 0000000..8bc012e --- /dev/null +++ b/data/langchain_data.json @@ -0,0 +1,44 @@ +[ + { + "snapshot_date": "2022-12", + "total_lines": 15774, + "composition": { + "2022": 15774 + } + }, + { + "snapshot_date": "2023-03", + "total_lines": 90222, + "composition": { + "2022": 28253, + "2023": 61969 + } + }, + { + "snapshot_date": "2023-06", + "total_lines": 509660, + "composition": { + "2023": 487975, + "2022": 21681, + "2026": 4 + } + }, + { + "snapshot_date": "2023-09", + "total_lines": 697569, + "composition": { + "2023": 681274, + "2022": 16291, + "2026": 4 + } + }, + { + "snapshot_date": "2023-12", + "total_lines": 895902, + "composition": { + "2023": 881194, + "2022": 14704, + "2026": 4 + } + } +] \ No newline at end of file diff --git a/data/react_data.json b/data/react_data.json new file mode 100644 index 0000000..29a2864 --- /dev/null +++ b/data/react_data.json @@ -0,0 +1,131 @@ +[ + { + "snapshot_date": "2013-06", + "total_lines": 44058, + "composition": { + "2013": 44058 + } + }, + { + "snapshot_date": "2013-09", + "total_lines": 56640, + "composition": { + "2013": 56640 + } + }, + { + "snapshot_date": "2013-12", + "total_lines": 103418, + "composition": { + "2013": 103418 + } + }, + { + "snapshot_date": "2014-03", + "total_lines": 139132, + "composition": { + "2014": 14460, + "2013": 124672 + } + }, + { + "snapshot_date": "2014-06", + "total_lines": 150788, + "composition": { + "2013": 122748, + "2014": 28040 + } + }, + { + "snapshot_date": "2014-09", + "total_lines": 164695, + "composition": { + "2013": 120753, + "2014": 43942 + } + }, + { + "snapshot_date": "2014-12", + "total_lines": 176099, + "composition": { + "2013": 115042, + "2014": 61057 + } + }, + { + "snapshot_date": "2015-03", + "total_lines": 192854, + "composition": { + "2015": 12814, + "2013": 111731, + "2014": 68309 + } + }, + { + "snapshot_date": "2015-06", + "total_lines": 272822, + "composition": { + "2015": 99379, + "2014": 64525, + "2013": 108918 + } + }, + { + "snapshot_date": "2015-09", + "total_lines": 285176, + "composition": { + "2015": 124048, + "2014": 61377, + "2013": 99751 + } + }, + { + "snapshot_date": "2015-12", + "total_lines": 309834, + "composition": { + "2013": 97720, + "2015": 146057, + "2014": 66057 + } + }, + { + "snapshot_date": "2016-03", + "total_lines": 323735, + "composition": { + "2015": 152293, + "2016": 8454, + "2014": 65616, + "2013": 97372 + } + }, + { + "snapshot_date": "2016-06", + "total_lines": 352278, + "composition": { + "2013": 92612, + "2015": 145240, + "2014": 58111, + "2016": 56315 + } + }, + { + "snapshot_date": "2016-09", + "total_lines": 374264, + "composition": { + "2016": 97634, + "2014": 56984, + "2015": 128249, + "2013": 91397 + } + }, + { + "snapshot_date": "2016-12", + "total_lines": 400419, + "composition": { + "2013": 87455, + "2015": 93943, + "2014": 53615, + "2016": 165406 + } + } +] \ No newline at end of file diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 12519b9..175ff42 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -1,35 +1,21 @@ """ This script is responsible for doing the heavy lifting. -Processes monthly snapshots incrementally to track code age distribution. +Processes repository snapshots incrementally to track code age distribution. +Uses quarterly resolution for historical data (pre-2025) and monthly for recent data (2025+). """ import concurrent.futures import json +import logging import os import shutil import subprocess import time from collections import defaultdict from datetime import datetime, timezone -from functools import wraps +from itertools import groupby - -def timer(func): - """ - A decorator that prints the execution time of the function it wraps. - Used for benchmarking sequential vs. concurrent execution optimizations. - """ - - @wraps(func) - def wrapper(*args, **kwargs): - start_time = time.perf_counter() - result = func(*args, **kwargs) - end_time = time.perf_counter() - execution_time = end_time - start_time - print(f"⏱️ [TIMER] '{func.__name__}' executed in {execution_time:.4f} seconds") - return result - - return wrapper +logger = logging.getLogger(__name__) def _run_command(cmd: list[str], cwd: str | None = None) -> str: @@ -65,46 +51,50 @@ def clone_repository(repo_slug: str, clone_dir: str) -> None: :param repo_slug: The GitHub repository identifier (e.g., 'facebook/react'). :param clone_dir: The local directory where the repository should be cloned. """ - print(f"Cloning {repo_slug} into {clone_dir}...") + logger.info("Cloning %s into %s...", repo_slug, clone_dir) repo_url = f"https://github.com/{repo_slug}.git" _run_command(["git", "clone", repo_url, clone_dir]) -def get_monthly_snapshots(repo_path: str) -> list[tuple[str, str]]: +def get_snapshots(repo_path: str) -> list[tuple[str, str]]: """ - Identify one commit per month to act as a historical snapshot. + Identify commits for snapshots: quarterly for pre-2025, monthly for 2025+. + + Quarterly uses the last month of each quarter: 03, 06, 09, 12. :param repo_path: Path to the git repository. :return: A list of tuples, each containing a 'YYYY-MM' period and the corresponding commit hash. - i.e., [(period, commit_hash), ...] """ log_output = _run_command( cmd=["git", "log", "--pretty=format:%H|%cI"], cwd=repo_path ) - snapshots: dict = {} + snapshots: dict[str, str] = {} for line in log_output.splitlines(): if not line: continue commit_hash, commit_date = line.split("|") - - # We slice the first 7 characters of the ISO to get the 'YYYY-MM' period period = commit_date[:7] - - # Git log outputs newest commit first. By assigning to the dictionary, - # the last commit processed for a month overwrites earlier ones, - # leaving us with the very first commit of that specific month snapshots[period] = commit_hash - return sorted(snapshots.items(), key=lambda x: x[0]) + quarterly_months = {"03", "06", "09", "12"} + filtered_snapshots: dict[str, str] = {} + + for period, commit_hash in snapshots.items(): + year = period[:4] + month = period[5:7] + + if int(year) >= 2025: + filtered_snapshots[period] = commit_hash + elif month in quarterly_months: + filtered_snapshots[period] = commit_hash + + return sorted(filtered_snapshots.items(), key=lambda x: x[0]) def _parse_blame_output(blame_output: str) -> dict[str, int]: """ Parse git blame --line-porcelain output, returning a year -> line count mapping. - Extracting this logic reduces nesting and properly handles Git's porcelain format, - where 'author-time' is only printed once per commit block, but actual code lines - begin with a tab character. :param blame_output: The raw output from git blame --line-porcelain :return: A dictionary mapping years to the number of lines changed in that year @@ -115,13 +105,11 @@ def _parse_blame_output(blame_output: str) -> dict[str, int]: for line in blame_output.splitlines(): if line.startswith("\t"): - # This is an actual line of code. Attribute it to the year of the current commit. if current_commit and current_commit in commit_to_year: year = commit_to_year[current_commit] file_distribution[year] += 1 else: parts = line.split(" ") - # Check if the line starts with a 40-char (SHA-1) or 64-char (SHA-256) commit hash if len(parts[0]) in (40, 64): current_commit = parts[0] elif parts[0] == "author-time": @@ -148,11 +136,9 @@ def _blame_single_file(repo_path: str, file: str) -> dict[str, int]: ) return _parse_blame_output(blame_output) except RuntimeError: - # Skip files that git blame cannot process (like binaries) return {} -@timer def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: """ Analyze the snapshots collected from the repository. @@ -169,10 +155,10 @@ def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: valid_files = [f for f in files if os.path.isfile(os.path.join(repo_path, f))] - # Use ThreadPoolExecutor to bypass the O(N) sequential subprocess bottleneck. - # Subprocess calls release the GIL, making threading highly effective here. - max_threads = 20 - with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor: + max_workers = int( + os.environ.get("BLAME_WORKERS", min(20, (os.cpu_count() or 1) * 2)) + ) + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_file = { executor.submit(_blame_single_file, repo_path, file): file for file in valid_files @@ -198,17 +184,26 @@ def load_existing_state(json_fname: str) -> list[dict]: with open(json_fname, "r", encoding="utf-8") as f: return json.load(f) except json.JSONDecodeError: - print(f"Warning: {json_fname} is corrupted, Start fresh.") + logger.warning("%s is corrupted, starting fresh.", json_fname) return [] return [] -# TODO: Make the main function to tie everything together -@timer +def _atomic_write_json(json_path: str, data: list[dict]) -> None: + """Write JSON data atomically to prevent corruption on crash.""" + tmp_path = json_path + ".tmp" + with open(tmp_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=4) + os.replace(tmp_path, json_path) + + def process_repository(repo_slug: str, data_dir: str) -> None: """ Orchestrate the extraction of Ship of Theseus code persistence data - using an incremental load strategy by just processing the delta + using an incremental load strategy by just processing the delta. + + Processes year-by-year and writes to disk after each year completes + to prevent data loss on crash. :param repo_slug: The GitHub repository identifier (e.g., 'facebook/react'). :param data_dir: Path where the resulting JSON data will be saved. @@ -216,23 +211,14 @@ def process_repository(repo_slug: str, data_dir: str) -> None: repo_name = repo_slug.split("/")[-1] temp_repo_path = f"./temp_workdir_{repo_name}" output_json_path = os.path.join(data_dir, f"{repo_name}_data.json") - # System design thinking is that we don't want to load existing state and recalculate redundantly - # - # We clone the repository dynamically just to read it. By pulling the codebase - # ourselves instead of relying on GitHub Actions checkout steps, we can iterate - # through 10, 50, or 100 repositories entirely within Python. - # - # Let's say we have a 10-year old repository. Running git blame on every file for every single month - # of it's 120 month long history would take hours and it would blow past the GitHub Action's free tier limit. - # By loading the existing state and only processing the delta, we can avoid this and run much faster. - # - # This reduces a 30-minute monthly compute job down to about 5 seconds, - # ensuring that I don't have to pay for keeping this project alive lmao. + try: if not os.path.exists(temp_repo_path): clone_repository(repo_slug, temp_repo_path) else: - print(f"Repository {repo_name} already exists locally. Fetching latest...") + logger.info( + "Repository %s already exists locally. Fetching latest...", repo_name + ) _run_command(["git", "fetch", "--all"], cwd=temp_repo_path) for branch in ["main", "master"]: try: @@ -245,65 +231,141 @@ def process_repository(repo_slug: str, data_dir: str) -> None: historical_data = load_existing_state(output_json_path) processed_periods = set(item["snapshot_date"] for item in historical_data) - all_snapshots = get_monthly_snapshots(temp_repo_path) - new_data = [] + all_snapshots = get_snapshots(temp_repo_path) + new_snapshots = [ + (period, commit) + for period, commit in all_snapshots + if period not in processed_periods + ] - for period, commit in all_snapshots: - if period in processed_periods: - # We already know what the repository looked like in this month. Skip it. - continue - - print( - f"[{repo_name}] Calculating DELTA for new period: {period} (Commit: {commit[:7]})..." - ) - distribution = analyze_snapshots(temp_repo_path, commit) - - new_data.append( - { - "snapshot_date": period, - "total_lines": sum(distribution.values()), - "composition": distribution, - } + if not new_snapshots: + logger.info( + "[%s] No new periods to process. Data is already up to date!", repo_name ) + return - if not new_data: - print( - f"[{repo_name}] No new months to process. Data is already up to date!" - ) - else: - final_dataset = historical_data + new_data - final_dataset.sort(key=lambda x: x["snapshot_date"]) + logger.info( + "[%s] Processing %d new snapshots with hybrid resolution (quarterly pre-2025, monthly 2025+)", + repo_name, + len(new_snapshots), + ) + + snapshots_by_year = groupby(new_snapshots, key=lambda x: x[0][:4]) + total_new_data = [] - with open(output_json_path, "w", encoding="utf-8") as f: - json.dump(final_dataset, f, indent=4) + for year, year_snapshots in snapshots_by_year: + year_snapshots_list = list(year_snapshots) + year_data = [] + year_start = time.perf_counter() - print( - f"[{repo_name}] Delta analysis complete. Appended {len(new_data)} new months." + logger.info( + "[%s] Processing year %s: %d snapshots", + repo_name, + year, + len(year_snapshots_list), + ) + + for idx, (period, commit) in enumerate(year_snapshots_list, 1): + logger.info( + "[%s] [%s] Processing %s (%d/%d) - Commit: %s", + repo_name, + year, + period, + idx, + len(year_snapshots_list), + commit[:7], + ) + + snapshot_start = time.perf_counter() + distribution = analyze_snapshots(temp_repo_path, commit) + snapshot_elapsed = time.perf_counter() - snapshot_start + + logger.info( + "[%s] [%s] Completed %s in %.2f seconds (%d total lines)", + repo_name, + year, + period, + snapshot_elapsed, + sum(distribution.values()), + ) + + year_data.append( + { + "snapshot_date": period, + "total_lines": sum(distribution.values()), + "composition": distribution, + } + ) + + total_new_data.extend(year_data) + year_elapsed = time.perf_counter() - year_start + + final_dataset = historical_data + total_new_data + final_dataset.sort(key=lambda x: x["snapshot_date"]) + _atomic_write_json(output_json_path, final_dataset) + + logger.info( + "[%s] Completed year %s in %.2f seconds. Wrote %d total snapshots to disk.", + repo_name, + year, + year_elapsed, + len(final_dataset), ) finally: - # Polite cleanup: Remove the gigantic source code folders we downloaded. - # We only want to keep the JSON data! if os.path.exists(temp_repo_path): - print(f"Cleaning up temporary directory: {temp_repo_path}") - # Note: Windows might need special handling for git files, but this works on Linux/Mac (GitHub Actions) - shutil.rmtree(temp_repo_path, ignore_errors=True) + logger.info("Cleaning up temporary directory: %s", temp_repo_path) + time.sleep(1) + + def handle_remove_readonly(func, path, exc_info): + """Handle permission errors on Windows/Unix.""" + try: + os.chmod(path, 0o777) + func(path) + except Exception: + pass + + for attempt in range(3): + try: + shutil.rmtree(temp_repo_path, onerror=handle_remove_readonly) + break + except Exception as e: + if attempt < 2: + time.sleep(1) + logger.warning("Cleanup attempt %d failed: %s", attempt + 1, e) + else: + logger.error( + "Failed to clean up temporary directory after 3 attempts: %s", + e, + ) if __name__ == "__main__": + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + DATA_OUTPUT_DIR = "./data" os.makedirs(DATA_OUTPUT_DIR, exist_ok=True) - # The Case Studies: Start with this one to benchmark. - TARGETS = [ - "anthropics/claude-code", - ] + TARGETS = ["anthropics/claude-code", "facebook/react", "langchain-ai/langchain"] overall_start = time.perf_counter() - for target in TARGETS: - print(f"\n{'=' * 50}\nStarting analysis pipeline for: {target}\n{'=' * 50}") - process_repository(target, DATA_OUTPUT_DIR) - overall_end = time.perf_counter() - print( - f"\n{'=' * 50}\nTOTAL PIPELINE EXECUTION TIME: {overall_end - overall_start:.2f} seconds\n{'=' * 50}" - ) + logger.info("Starting analysis pipeline for %d repositories", len(TARGETS)) + + with concurrent.futures.ThreadPoolExecutor(max_workers=len(TARGETS)) as executor: + futures = { + executor.submit(process_repository, target, DATA_OUTPUT_DIR): target + for target in TARGETS + } + for future in concurrent.futures.as_completed(futures): + target = futures[future] + try: + future.result() + except Exception as e: + logger.error("Failed to process %s: %s", target, e) + + overall_elapsed = time.perf_counter() - overall_start + logger.info("TOTAL PIPELINE EXECUTION TIME: %.2f seconds", overall_elapsed) From 49c475b40c806c1d0ae61ac75fb1c79629105358 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sun, 5 Apr 2026 10:51:35 +0530 Subject: [PATCH 3/4] #10 update the dataset --- data/langchain_data.json | 216 +++++++++++++ data/react_data.json | 651 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 867 insertions(+) diff --git a/data/langchain_data.json b/data/langchain_data.json index 8bc012e..d247594 100644 --- a/data/langchain_data.json +++ b/data/langchain_data.json @@ -40,5 +40,221 @@ "2022": 14704, "2026": 4 } + }, + { + "snapshot_date": "2024-03", + "total_lines": 1313210, + "composition": { + "2024": 357436, + "2023": 944924, + "2022": 10842, + "2026": 8 + } + }, + { + "snapshot_date": "2024-06", + "total_lines": 1479793, + "composition": { + "2023": 826677, + "2024": 645423, + "2022": 7685, + "2026": 8 + } + }, + { + "snapshot_date": "2024-09", + "total_lines": 1358307, + "composition": { + "2023": 686097, + "2024": 664907, + "2022": 7295, + "2026": 8 + } + }, + { + "snapshot_date": "2024-12", + "total_lines": 1236872, + "composition": { + "2024": 683157, + "2023": 546569, + "2022": 7138, + "2026": 8 + } + }, + { + "snapshot_date": "2025-01", + "total_lines": 1271958, + "composition": { + "2024": 720654, + "2023": 544169, + "2022": 7122, + "2026": 8, + "2025": 5 + } + }, + { + "snapshot_date": "2025-02", + "total_lines": 1314181, + "composition": { + "2023": 542683, + "2024": 707703, + "2025": 56681, + "2022": 7106, + "2026": 8 + } + }, + { + "snapshot_date": "2025-03", + "total_lines": 1321236, + "composition": { + "2023": 533418, + "2024": 657044, + "2025": 124237, + "2022": 6529, + "2026": 8 + } + }, + { + "snapshot_date": "2025-04", + "total_lines": 1258419, + "composition": { + "2023": 491914, + "2024": 630484, + "2025": 129671, + "2022": 6346, + "2026": 4 + } + }, + { + "snapshot_date": "2025-05", + "total_lines": 914619, + "composition": { + "2024": 448680, + "2025": 144491, + "2023": 315772, + "2022": 5672, + "2026": 4 + } + }, + { + "snapshot_date": "2025-06", + "total_lines": 921892, + "composition": { + "2023": 313764, + "2024": 444312, + "2025": 158141, + "2022": 5671, + "2026": 4 + } + }, + { + "snapshot_date": "2025-07", + "total_lines": 931014, + "composition": { + "2023": 313208, + "2024": 441747, + "2025": 170384, + "2022": 5671, + "2026": 4 + } + }, + { + "snapshot_date": "2025-08", + "total_lines": 953350, + "composition": { + "2025": 203640, + "2023": 308982, + "2024": 435295, + "2022": 5429, + "2026": 4 + } + }, + { + "snapshot_date": "2025-09", + "total_lines": 970186, + "composition": { + "2025": 222619, + "2023": 308625, + "2024": 433511, + "2022": 5427, + "2026": 4 + } + }, + { + "snapshot_date": "2025-10", + "total_lines": 994939, + "composition": { + "2023": 306859, + "2025": 253209, + "2024": 429458, + "2022": 5409, + "2026": 4 + } + }, + { + "snapshot_date": "2025-11", + "total_lines": 438806, + "composition": { + "2025": 213274, + "2023": 83597, + "2024": 138301, + "2022": 3632, + "2026": 2 + } + }, + { + "snapshot_date": "2025-12", + "total_lines": 437864, + "composition": { + "2025": 213376, + "2023": 83259, + "2024": 137738, + "2022": 3489, + "2026": 2 + } + }, + { + "snapshot_date": "2026-01", + "total_lines": 445362, + "composition": { + "2025": 221792, + "2023": 82989, + "2024": 136828, + "2022": 3485, + "2026": 268 + } + }, + { + "snapshot_date": "2026-02", + "total_lines": 433246, + "composition": { + "2024": 128469, + "2023": 81140, + "2025": 207607, + "2026": 12545, + "2022": 3485 + } + }, + { + "snapshot_date": "2026-03", + "total_lines": 446056, + "composition": { + "2023": 81121, + "2025": 198803, + "2024": 126788, + "2026": 35860, + "2022": 3484 + } + }, + { + "snapshot_date": "2026-04", + "total_lines": 454088, + "composition": { + "2025": 195106, + "2023": 81079, + "2024": 126636, + "2026": 47792, + "2022": 3475 + } } ] \ No newline at end of file diff --git a/data/react_data.json b/data/react_data.json index 29a2864..f8dc894 100644 --- a/data/react_data.json +++ b/data/react_data.json @@ -127,5 +127,656 @@ "2014": 53615, "2016": 165406 } + }, + { + "snapshot_date": "2017-03", + "total_lines": 418371, + "composition": { + "2016": 166245, + "2017": 18369, + "2014": 53388, + "2015": 93195, + "2013": 87174 + } + }, + { + "snapshot_date": "2017-06", + "total_lines": 428321, + "composition": { + "2016": 114802, + "2017": 97714, + "2014": 49195, + "2013": 80876, + "2015": 85734 + } + }, + { + "snapshot_date": "2017-09", + "total_lines": 437769, + "composition": { + "2016": 100286, + "2017": 137649, + "2014": 44904, + "2013": 73677, + "2015": 81253 + } + }, + { + "snapshot_date": "2017-12", + "total_lines": 185441, + "composition": { + "2014": 3296, + "2016": 37649, + "2017": 130122, + "2015": 9732, + "2013": 4642 + } + }, + { + "snapshot_date": "2018-03", + "total_lines": 206585, + "composition": { + "2017": 133361, + "2016": 31594, + "2018": 25647, + "2015": 8897, + "2013": 4240, + "2014": 2846 + } + }, + { + "snapshot_date": "2018-06", + "total_lines": 218276, + "composition": { + "2013": 4178, + "2015": 8798, + "2014": 2813, + "2016": 28955, + "2017": 125888, + "2018": 47644 + } + }, + { + "snapshot_date": "2018-09", + "total_lines": 246988, + "composition": { + "2017": 123086, + "2015": 8701, + "2016": 27872, + "2018": 80461, + "2014": 2783, + "2013": 4085 + } + }, + { + "snapshot_date": "2018-12", + "total_lines": 271742, + "composition": { + "2017": 119475, + "2016": 27687, + "2013": 4028, + "2018": 109144, + "2015": 8663, + "2014": 2745 + } + }, + { + "snapshot_date": "2019-03", + "total_lines": 285663, + "composition": { + "2018": 105961, + "2014": 2745, + "2017": 119065, + "2015": 8661, + "2016": 27318, + "2013": 4026, + "2019": 17887 + } + }, + { + "snapshot_date": "2019-06", + "total_lines": 47716, + "composition": { + "2019": 47716 + } + }, + { + "snapshot_date": "2019-09", + "total_lines": 388817, + "composition": { + "2019": 144487, + "2013": 4006, + "2015": 8643, + "2014": 2733, + "2017": 116957, + "2018": 85535, + "2016": 26456 + } + }, + { + "snapshot_date": "2019-12", + "total_lines": 413993, + "composition": { + "2015": 8633, + "2017": 115880, + "2016": 26323, + "2014": 2731, + "2013": 4006, + "2019": 173378, + "2018": 83042 + } + }, + { + "snapshot_date": "2020-03", + "total_lines": 418095, + "composition": { + "2014": 2628, + "2020": 25299, + "2019": 161223, + "2018": 77097, + "2016": 25834, + "2017": 113668, + "2013": 3813, + "2015": 8533 + } + }, + { + "snapshot_date": "2020-06", + "total_lines": 484835, + "composition": { + "2020": 107514, + "2017": 111700, + "2019": 151449, + "2018": 74527, + "2013": 3705, + "2015": 8475, + "2016": 24830, + "2014": 2635 + } + }, + { + "snapshot_date": "2020-09", + "total_lines": 484772, + "composition": { + "2019": 134359, + "2016": 24617, + "2018": 71821, + "2020": 130649, + "2017": 109402, + "2015": 8375, + "2014": 2117, + "2013": 3432 + } + }, + { + "snapshot_date": "2020-12", + "total_lines": 512569, + "composition": { + "2015": 8365, + "2014": 2109, + "2016": 24570, + "2013": 3381, + "2019": 132051, + "2020": 161173, + "2017": 109318, + "2018": 71602 + } + }, + { + "snapshot_date": "2021-03", + "total_lines": 529104, + "composition": { + "2017": 106680, + "2019": 123247, + "2021": 14784, + "2014": 2109, + "2018": 71013, + "2020": 174962, + "2013": 3381, + "2015": 8365, + "2016": 24563 + } + }, + { + "snapshot_date": "2021-06", + "total_lines": 541322, + "composition": { + "2013": 3366, + "2018": 64817, + "2020": 164228, + "2021": 51873, + "2017": 104862, + "2015": 8362, + "2019": 117274, + "2014": 2097, + "2016": 24443 + } + }, + { + "snapshot_date": "2021-09", + "total_lines": 579893, + "composition": { + "2019": 114891, + "2021": 100659, + "2020": 157591, + "2015": 8299, + "2014": 2055, + "2016": 24342, + "2013": 3366, + "2017": 104459, + "2018": 64231 + } + }, + { + "snapshot_date": "2021-12", + "total_lines": 602363, + "composition": { + "2019": 113210, + "2021": 132852, + "2015": 8119, + "2017": 103777, + "2016": 24244, + "2014": 2047, + "2013": 3347, + "2020": 151036, + "2018": 63731 + } + }, + { + "snapshot_date": "2022-03", + "total_lines": 618762, + "composition": { + "2020": 149490, + "2021": 132083, + "2019": 111676, + "2017": 103610, + "2022": 20641, + "2018": 63553, + "2015": 8093, + "2016": 24233, + "2013": 3347, + "2014": 2036 + } + }, + { + "snapshot_date": "2022-06", + "total_lines": 626915, + "composition": { + "2021": 128680, + "2020": 146266, + "2014": 2036, + "2013": 3343, + "2015": 8081, + "2019": 105978, + "2017": 103567, + "2022": 42376, + "2018": 62371, + "2016": 24217 + } + }, + { + "snapshot_date": "2022-09", + "total_lines": 634645, + "composition": { + "2021": 125186, + "2017": 102835, + "2019": 101050, + "2022": 65451, + "2020": 141043, + "2018": 61418, + "2015": 8069, + "2014": 2036, + "2016": 24214, + "2013": 3343 + } + }, + { + "snapshot_date": "2022-12", + "total_lines": 655431, + "composition": { + "2019": 97224, + "2021": 122925, + "2020": 136881, + "2017": 101550, + "2015": 8038, + "2016": 24156, + "2018": 59931, + "2022": 99388, + "2013": 3308, + "2014": 2030 + } + }, + { + "snapshot_date": "2023-03", + "total_lines": 611442, + "composition": { + "2019": 95301, + "2021": 113457, + "2022": 78827, + "2023": 25935, + "2020": 102538, + "2017": 99498, + "2015": 7993, + "2016": 23711, + "2013": 3258, + "2014": 2005, + "2018": 58919 + } + }, + { + "snapshot_date": "2023-06", + "total_lines": 123600, + "composition": { + "2022": 17170, + "2021": 61003, + "2023": 45427 + } + }, + { + "snapshot_date": "2023-09", + "total_lines": 344041, + "composition": { + "2023": 278928, + "2021": 55707, + "2022": 9406 + } + }, + { + "snapshot_date": "2023-12", + "total_lines": 384398, + "composition": { + "2023": 315798, + "2021": 58578, + "2022": 10022 + } + }, + { + "snapshot_date": "2024-03", + "total_lines": 74640, + "composition": { + "2021": 74640 + } + }, + { + "snapshot_date": "2024-06", + "total_lines": 1039548, + "composition": { + "2024": 131231, + "2023": 389106, + "2022": 70231, + "2019": 76856, + "2020": 86835, + "2021": 110365, + "2013": 2827, + "2015": 6468, + "2014": 1438, + "2017": 91522, + "2018": 51463, + "2016": 21206 + } + }, + { + "snapshot_date": "2024-09", + "total_lines": 1071649, + "composition": { + "2024": 186038, + "2020": 85181, + "2017": 91329, + "2015": 6450, + "2016": 21094, + "2019": 74478, + "2021": 107827, + "2014": 1434, + "2023": 376610, + "2013": 2827, + "2018": 50963, + "2022": 67418 + } + }, + { + "snapshot_date": "2024-12", + "total_lines": 1096527, + "composition": { + "2024": 222258, + "2023": 371161, + "2018": 50452, + "2021": 106543, + "2017": 91147, + "2019": 73494, + "2022": 65464, + "2013": 2798, + "2016": 21016, + "2015": 6434, + "2020": 84332, + "2014": 1428 + } + }, + { + "snapshot_date": "2025-01", + "total_lines": 1104789, + "composition": { + "2019": 72870, + "2024": 235398, + "2023": 369720, + "2020": 83431, + "2021": 105680, + "2022": 64957, + "2013": 2798, + "2018": 49935, + "2017": 91080, + "2015": 6409, + "2016": 21010, + "2014": 1424, + "2025": 77 + } + }, + { + "snapshot_date": "2025-02", + "total_lines": 901529, + "composition": { + "2021": 105294, + "2019": 72002, + "2020": 81905, + "2013": 2794, + "2015": 6344, + "2014": 1412, + "2024": 228979, + "2017": 90754, + "2022": 64653, + "2023": 146990, + "2016": 20945, + "2018": 48938, + "2025": 30519 + } + }, + { + "snapshot_date": "2025-03", + "total_lines": 909351, + "composition": { + "2024": 226806, + "2020": 81740, + "2025": 43402, + "2017": 90722, + "2015": 6341, + "2016": 20945, + "2019": 70686, + "2021": 105234, + "2023": 145909, + "2014": 1412, + "2013": 2793, + "2022": 64519, + "2018": 48842 + } + }, + { + "snapshot_date": "2025-04", + "total_lines": 934567, + "composition": { + "2023": 144980, + "2021": 105085, + "2024": 221556, + "2025": 76221, + "2019": 70526, + "2020": 81491, + "2017": 90713, + "2022": 63742, + "2014": 1412, + "2013": 2793, + "2016": 20943, + "2015": 6341, + "2018": 48764 + } + }, + { + "snapshot_date": "2025-05", + "total_lines": 949315, + "composition": { + "2024": 220417, + "2013": 2793, + "2016": 20943, + "2022": 63508, + "2015": 6341, + "2023": 143478, + "2018": 48758, + "2017": 90707, + "2019": 70454, + "2020": 81428, + "2014": 1412, + "2021": 103829, + "2025": 95247 + } + }, + { + "snapshot_date": "2025-06", + "total_lines": 963256, + "composition": { + "2024": 218219, + "2025": 112502, + "2023": 142802, + "2021": 103776, + "2020": 81289, + "2013": 2793, + "2018": 48737, + "2017": 90702, + "2015": 6341, + "2019": 70363, + "2022": 63378, + "2016": 20942, + "2014": 1412 + } + }, + { + "snapshot_date": "2025-07", + "total_lines": 984841, + "composition": { + "2024": 215841, + "2025": 137355, + "2019": 70222, + "2021": 103736, + "2023": 142387, + "2020": 81261, + "2017": 90701, + "2015": 6341, + "2016": 20939, + "2014": 1412, + "2013": 2793, + "2018": 48694, + "2022": 63159 + } + }, + { + "snapshot_date": "2025-08", + "total_lines": 995729, + "composition": { + "2024": 212777, + "2020": 81060, + "2025": 153793, + "2019": 69618, + "2021": 103392, + "2023": 141860, + "2014": 1412, + "2017": 90690, + "2015": 6334, + "2016": 20929, + "2013": 2793, + "2018": 48331, + "2022": 62740 + } + }, + { + "snapshot_date": "2025-09", + "total_lines": 1021019, + "composition": { + "2024": 209350, + "2020": 80847, + "2025": 185175, + "2019": 69500, + "2021": 103253, + "2023": 140410, + "2013": 2793, + "2015": 6334, + "2014": 1412, + "2017": 90685, + "2016": 20929, + "2018": 48318, + "2022": 62013 + } + }, + { + "snapshot_date": "2025-10", + "total_lines": 1033401, + "composition": { + "2017": 90684, + "2015": 6334, + "2016": 20929, + "2019": 69355, + "2024": 208263, + "2021": 103074, + "2020": 80758, + "2025": 199752, + "2023": 139982, + "2014": 1412, + "2018": 48312, + "2022": 61753, + "2013": 2793 + } + }, + { + "snapshot_date": "2025-11", + "total_lines": 1041913, + "composition": { + "2024": 207430, + "2025": 209692, + "2019": 69272, + "2021": 103036, + "2020": 80677, + "2023": 139651, + "2014": 1412, + "2017": 90684, + "2015": 6334, + "2016": 20929, + "2018": 48310, + "2022": 61693, + "2013": 2793 + } + }, + { + "snapshot_date": "2025-12", + "total_lines": 1047865, + "composition": { + "2020": 80599, + "2024": 205774, + "2025": 220530, + "2021": 103015, + "2023": 136868, + "2019": 69017, + "2014": 1412, + "2013": 2793, + "2015": 6334, + "2017": 90652, + "2016": 20915, + "2018": 48277, + "2022": 61679 + } } ] \ No newline at end of file From 78747492a5e748a3dfce14dbe76ebf2d4ff2b5b9 Mon Sep 17 00:00:00 2001 From: Asif Sayyed Date: Sun, 5 Apr 2026 12:10:28 +0530 Subject: [PATCH 4/4] #3 finalised the poc interface --- .gitignore | 5 +- app.js | 364 ++++++++++++++++++++++++---------- data/manifest.json | 10 + index.html | 54 ++++- scripts/analyse_repository.py | 42 +++- style.css | 167 +++++++++++++--- 6 files changed, 492 insertions(+), 150 deletions(-) diff --git a/.gitignore b/.gitignore index b7faf40..d27d551 100644 --- a/.gitignore +++ b/.gitignore @@ -182,9 +182,9 @@ cython_debug/ .abstra/ # Visual Studio Code -# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore -# and can be added to the global gitignore or merged into this file. However, if you prefer, +# and can be added to the global gitignore or merged into this file. However, if you prefer, # you could uncomment the following to ignore the entire vscode folder # .vscode/ @@ -200,6 +200,7 @@ cython_debug/ # refer to https://docs.cursor.com/context/ignore-files .cursorignore .cursorindexingignore +.agent # Marimo marimo/_static/ diff --git a/app.js b/app.js index eeb994f..d15d009 100644 --- a/app.js +++ b/app.js @@ -1,6 +1,7 @@ /** * Ship of Theseus - Code Visualizer * Core Logic (Vanilla JS + SVG) + * Data is loaded from data/*.json files - run with a local server */ class TheseusVisualizer { @@ -10,36 +11,71 @@ class TheseusVisualizer { this.currentRepo = null; this.canvas = document.getElementById('main-chart'); this.tooltip = document.getElementById('tooltip'); + this.legend = document.getElementById('chart-legend'); this.repoSelector = document.getElementById('repo-selector'); this.repoDescription = document.getElementById('repo-description'); + this.vizToggle = document.getElementById('viz-mode-toggle'); + this.scaleToggle = document.getElementById('scale-toggle'); this.loadingState = document.getElementById('chart-loading'); - - this.margin = { top: 40, right: 20, bottom: 60, left: 60 }; + + this.margin = { top: 10, right: 0, bottom: 30, left: 50 }; this.years = []; this.points = []; - + this.vizMode = 'chronological'; // 'chronological' | 'identity' + this.yScaleMode = 'linear'; // 'linear' | 'log' + this.init(); } async init() { try { const response = await fetch('data/manifest.json'); + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } let data = await response.json(); - - // Normalize manifest to array this.manifest = Array.isArray(data) ? data : [data]; - + this.renderSelectors(); + this.setupModeToggle(); + this.setupScaleToggle(); + if (this.manifest.length > 0) { this.loadRepo(this.manifest[0].name); } } catch (err) { - this.showError("Failed to load repository manifest."); + this.showError("Failed to load repository manifest: " + err.message); } window.addEventListener('resize', () => this.debouncedRender()); } + setupModeToggle() { + this.vizToggle.addEventListener('click', (e) => { + const btn = e.target.closest('.mode-btn'); + if (!btn || btn.classList.contains('active')) return; + + document.querySelectorAll('.mode-btn').forEach(b => b.classList.remove('active')); + btn.classList.add('active'); + + this.vizMode = btn.dataset.mode; + if (this.currentData) this.renderChart(); + }); + } + + setupScaleToggle() { + this.scaleToggle.addEventListener('click', (e) => { + const btn = e.target.closest('.scale-btn'); + if (!btn || btn.classList.contains('active')) return; + + document.querySelectorAll('.scale-btn').forEach(b => b.classList.remove('active')); + btn.classList.add('active'); + + this.yScaleMode = btn.dataset.scale; + if (this.currentData) this.renderChart(); + }); + } + renderSelectors() { this.repoSelector.innerHTML = ''; this.manifest.forEach(repo => { @@ -53,28 +89,30 @@ class TheseusVisualizer { } async loadRepo(repoName) { - if (this.currentRepo === repoName) return; - - this.currentRepo = repoName; - this.updateActiveBtn(repoName); + if (this.currentRepo === repoName && this.currentData) return; + this.showLoading(true); this.hideError(); try { const repoInfo = this.manifest.find(r => r.name === repoName); - const fileName = repoInfo.file; this.repoDescription.textContent = repoInfo.description || ''; - - const response = await fetch(`data/${fileName}`); + + const response = await fetch(`data/${repoInfo.file}`); + if (!response.ok) throw new Error(`HTTP ${response.status}`); this.currentData = await response.json(); - + + this.currentRepo = repoName; + this.updateActiveBtn(repoName); + this.processData(); this.renderChart(); this.updateInsights(); - this.showLoading(false); } catch (err) { console.error(err); this.showError(`Failed to load data for ${repoName}`); + } finally { + this.showLoading(false); } } @@ -100,7 +138,7 @@ class TheseusVisualizer { composition: d.composition, stack: {} }; - + let cumulative = 0; this.years.forEach(year => { const value = d.composition[year] || 0; @@ -111,135 +149,253 @@ class TheseusVisualizer { }; cumulative += value; }); - + return point; }); } renderChart() { - const { width, height } = this.canvas.getBoundingClientRect(); - this.canvas.innerHTML = ''; // Clear previous + const width = this.canvas.clientWidth; + const height = this.canvas.clientHeight; + if (!width || !height) return; + + this.canvas.innerHTML = ''; // Define Gradients const defs = this.createSVGElement('defs'); - this.years.forEach((year, i) => { - const hue = (180 + i * 40) % 360; // Spread colors starting from Cyan - const grad = this.createSVGElement('linearGradient', { - id: `grad-${year}`, - x1: '0%', y1: '0%', x2: '0%', y2: '100%' + if (this.vizMode === 'identity') { + const gradOriginal = this.createSVGElement('linearGradient', { id: `grad-original`, x1: '0%', y1: '0%', x2: '0%', y2: '100%' }); + gradOriginal.appendChild(this.createSVGElement('stop', { offset: '0%', 'stop-color': `#3bc7c7`, 'stop-opacity': 0.8 })); + gradOriginal.appendChild(this.createSVGElement('stop', { offset: '100%', 'stop-color': `#3bc7c7`, 'stop-opacity': 0.1 })); + defs.appendChild(gradOriginal); + + const gradRefactored = this.createSVGElement('linearGradient', { id: `grad-refactored`, x1: '0%', y1: '0%', x2: '0%', y2: '100%' }); + gradRefactored.appendChild(this.createSVGElement('stop', { offset: '0%', 'stop-color': `#f0a33b`, 'stop-opacity': 0.8 })); + gradRefactored.appendChild(this.createSVGElement('stop', { offset: '100%', 'stop-color': `#f0a33b`, 'stop-opacity': 0.1 })); + defs.appendChild(gradRefactored); + } else { + this.years.forEach((year, i) => { + const hue = (180 + i * 40) % 360; + const grad = this.createSVGElement('linearGradient', { id: `grad-${year}`, x1: '0%', y1: '0%', x2: '0%', y2: '100%' }); + grad.appendChild(this.createSVGElement('stop', { offset: '0%', 'stop-color': `hsl(${hue}, 70%, 55%)`, 'stop-opacity': 0.8 })); + grad.appendChild(this.createSVGElement('stop', { offset: '100%', 'stop-color': `hsl(${hue}, 70%, 55%)`, 'stop-opacity': 0.1 })); + defs.appendChild(grad); }); - grad.appendChild(this.createSVGElement('stop', { offset: '0%', 'stop-color': `hsl(${hue}, 70%, 55%)`, 'stop-opacity': 0.8 })); - grad.appendChild(this.createSVGElement('stop', { offset: '100%', 'stop-color': `hsl(${hue}, 70%, 55%)`, 'stop-opacity': 0.1 })); - defs.appendChild(grad); - }); + } this.canvas.appendChild(defs); const chartWidth = width - this.margin.left - this.margin.right; const chartHeight = height - this.margin.top - this.margin.bottom; // Scales - const xScale = (i) => (i / (this.points.length - 1)) * chartWidth; + const xDenominator = Math.max(1, this.points.length - 1); + const xScale = (i) => (i / xDenominator) * chartWidth; const maxVal = Math.max(...this.points.map(p => p.total)); - const yScale = (v) => chartHeight - (v / maxVal) * chartHeight; + + let yScale; + if (this.yScaleMode === 'log') { + const minLog = 0; // log10(1) + const maxLog = Math.log10(maxVal + 1); + yScale = (v) => { + const logV = Math.log10(v + 1); + return chartHeight - ((logV - minLog) / (maxLog - minLog)) * chartHeight; + }; + } else { + yScale = maxVal > 0 ? (v) => chartHeight - (v / maxVal) * chartHeight : (v) => chartHeight; + } // Render Areas const group = this.createSVGElement('g', { transform: `translate(${this.margin.left}, ${this.margin.top})` }); - - this.years.forEach(year => { + + this.years.forEach((year, idx) => { const pathData = this.points.map((p, i) => `${xScale(i)},${yScale(p.stack[year].end)}`); const bottomData = this.points.map((p, i) => `${xScale(i)},${yScale(p.stack[year].start)}`).reverse(); - + + let fillUrl; + if (this.vizMode === 'identity') { + fillUrl = `url(#grad-${idx === 0 ? 'original' : 'refactored'})`; + } else { + fillUrl = `url(#grad-${year})`; + } + const areaPath = this.createSVGElement('path', { d: `M${pathData.join(' L')} L${bottomData.join(' L')} Z`, - fill: `url(#grad-${year})`, + fill: fillUrl, class: 'chart-area', 'data-year': year }); - - // Initial state for animation + areaPath.style.opacity = '0'; areaPath.style.transition = 'opacity 1.5s ease-out'; group.appendChild(areaPath); - setTimeout(() => areaPath.style.opacity = '1', 50); }); - // Axes (Subtle) + // Legend + this.renderLegend(); + + // Axes this.renderAxes(group, chartWidth, chartHeight, xScale, yScale, maxVal); - // Interaction Overlay - const overlay = this.createSVGElement('rect', { - width: chartWidth, - height: chartHeight, - fill: 'transparent' - }); - - overlay.onmousemove = (e) => this.handleMouseMove(e, chartWidth, xScale); + // Interaction + const overlay = this.createSVGElement('rect', { width: chartWidth, height: chartHeight, fill: 'transparent' }); + overlay.onmousemove = (e) => { + const svgRect = this.canvas.getBoundingClientRect(); + const mouseX = e.clientX - svgRect.left - this.margin.left; + const index = Math.round((mouseX / chartWidth) * (this.points.length - 1)); + if (index >= 0 && index < this.points.length) { + this.showTooltip(this.points[index], e.clientX - svgRect.left, e.clientY - svgRect.top); + } + }; overlay.onmouseleave = () => this.hideTooltip(); group.appendChild(overlay); this.canvas.appendChild(group); } - renderAxes(group, width, height, xScale, yScale, maxVal) { - // Horizontal Grid Lines - const tickCount = 5; - for (let i = 0; i <= tickCount; i++) { - const val = (i / tickCount) * maxVal; - const y = yScale(val); - - const line = this.createSVGElement('line', { - x1: 0, x2: width, y1: y, y2: y, - stroke: '#374151', 'stroke-dasharray': '3,3', 'stroke-opacity': 0.5 - }); - group.appendChild(line); + renderLegend() { + this.legend.innerHTML = ''; + const items = this.vizMode === 'identity' + ? [{ label: 'Original Code', color: '#3bc7c7' }, { label: 'Refactored', color: '#f0a33b' }] + : this.years.map((y, i) => ({ label: y, color: `hsl(${(180 + i * 40) % 360}, 70%, 55%)` })); + + items.forEach(item => { + const div = document.createElement('div'); + div.className = 'legend-item'; + div.innerHTML = ` + + ${item.label} + `; + this.legend.appendChild(div); + }); + } - const label = this.createSVGElement('text', { - x: -10, y: y + 4, 'text-anchor': 'end', fill: '#6b7280', 'font-size': '10px' - }); - label.textContent = val >= 1000 ? `${(val/1000).toFixed(1)}k` : Math.round(val); - group.appendChild(label); + renderAxes(group, width, height, xScale, yScale, maxVal) { + const formatValue = (v) => { + if (v >= 1000000) return `${(v / 1000000).toFixed(1)}M`; + if (v >= 1000) return `${(v / 1000).toFixed(1)}k`; + return Math.round(v); + }; + + let lastY = -100; + const minGap = 20; + + if (this.yScaleMode === 'log') { + let val = 1; + while (val <= maxVal * 10) { + const y = yScale(Math.min(val, maxVal)); + if (y >= 0 && y <= height && Math.abs(y - lastY) > minGap) { + group.appendChild(this.createSVGElement('line', { x1: 0, x2: width, y1: y, y2: y, stroke: '#374151', 'stroke-dasharray': '3,3', 'stroke-opacity': 0.5 })); + const label = this.createSVGElement('text', { x: -10, y: y + 4, 'text-anchor': 'end', fill: '#6b7280', 'font-size': '10px' }); + label.textContent = formatValue(val); + group.appendChild(label); + lastY = y; + } + val *= 10; + if (val === 10 && maxVal < 1) break; + } + } else { + const tickCount = 5; + for (let i = 0; i <= tickCount; i++) { + const val = (i / tickCount) * maxVal; + const y = yScale(val); + if (Math.abs(y - lastY) > minGap) { + group.appendChild(this.createSVGElement('line', { x1: 0, x2: width, y1: y, y2: y, stroke: '#374151', 'stroke-dasharray': '3,3', 'stroke-opacity': 0.5 })); + const label = this.createSVGElement('text', { x: -10, y: y + 4, 'text-anchor': 'end', fill: '#6b7280', 'font-size': '10px' }); + label.textContent = formatValue(val); + group.appendChild(label); + lastY = y; + } + } } - // X Axis labels (Show every few snapshots) const xStep = Math.max(1, Math.floor(this.points.length / 6)); this.points.forEach((p, i) => { if (i % xStep === 0 || p.date.endsWith('-01')) { - const label = this.createSVGElement('text', { - x: xScale(i), y: height + 25, 'text-anchor': 'middle', fill: '#6b7280', 'font-size': '10px' - }); + const label = this.createSVGElement('text', { x: xScale(i), y: height + 25, 'text-anchor': 'middle', fill: '#6b7280', 'font-size': '10px' }); label.textContent = p.date.endsWith('-01') ? p.date.split('-')[0] : (i % xStep === 0 ? p.date : ''); if (label.textContent) group.appendChild(label); } }); } - handleMouseMove(e, chartWidth, xScale) { + showTooltip(point, x, y) { + this.tooltip.classList.remove('hidden'); + + // Initial placement + let left = x + 15; + let top = y + 15; + + // Get bounds + const tooltipWidth = this.tooltip.offsetWidth; + const tooltipHeight = this.tooltip.offsetHeight; + const containerWidth = document.body.clientWidth; const svgRect = this.canvas.getBoundingClientRect(); - const mouseX = e.clientX - svgRect.left - this.margin.left; - - // Find nearest point - const index = Math.round((mouseX / chartWidth) * (this.points.length - 1)); - if (index >= 0 && index < this.points.length) { - const p = this.points[index]; - this.showTooltip(p, e.clientX, e.clientY); + + // Horizontal flip if too close to right edge + if (svgRect.left + left + tooltipWidth > containerWidth - 20) { + left = x - tooltipWidth - 15; } - } - showTooltip(point, x, y) { - this.tooltip.classList.remove('hidden'); - this.tooltip.style.left = `${x + 15}px`; - this.tooltip.style.top = `${y + 15}px`; + // Vertical flip if too close to bottom (relative to viewport) + if (svgRect.top + top + tooltipHeight > window.innerHeight - 20) { + top = y - tooltipHeight - 15; + } + + this.tooltip.style.left = `${left}px`; + this.tooltip.style.top = `${top}px`; + + const getColor = (idx, year) => { + if (this.vizMode === 'identity') return idx === 0 ? '#3bc7c7' : '#f0a33b'; + const yearIdx = this.years.indexOf(year); + return `hsl(${(180 + yearIdx * 40) % 360}, 70%, 55%)`; + }; let compositionHtml = ''; - this.years.slice().sort((a,b) => b-a).forEach(year => { + if (this.vizMode === 'identity') { + const oldestYear = this.years[0]; + const originalVal = point.composition[oldestYear] || 0; + const refactoredVal = point.total - originalVal; + + compositionHtml += ` +
+
+ + Original (${oldestYear}) +
+
+ ${originalVal.toLocaleString()} + ${((originalVal / point.total) * 100).toFixed(1)}% +
+
+
+
+ + Refactored +
+
+ ${refactoredVal.toLocaleString()} + ${((refactoredVal / point.total) * 100).toFixed(1)}% +
+
+
+ `; + } + + this.years.slice().sort((a, b) => b - a).forEach(year => { const val = point.composition[year] || 0; if (val > 0) { - const pct = ((val / point.total) * 100).toFixed(1); + const yearColor = getColor(null, year); compositionHtml += `
- Code from ${year}: - ${val.toLocaleString()} lines (${pct}%) +
+ + ${year} +
+
+ ${val.toLocaleString()} + ${((val / point.total) * 100).toFixed(1)}% +
`; } @@ -247,10 +403,11 @@ class TheseusVisualizer { this.tooltip.innerHTML = `
Snapshot: ${point.date}
-
- Total Size: +
+ Total Project Size ${point.total.toLocaleString()} lines
+
${compositionHtml} `; } @@ -261,24 +418,21 @@ class TheseusVisualizer { updateInsights() { if (!this.currentData || this.currentData.length === 0) return; - const first = this.currentData[0]; const last = this.currentData[this.currentData.length - 1]; - // % Replaced calculation - const originalYear = Object.keys(first.composition)[0]; - const originalLinesInLast = last.composition[originalYear] || 0; - const originalLinesInFirst = first.total_lines; - - const replaced = ((originalLinesInFirst - originalLinesInLast) / originalLinesInFirst) * 100; - document.getElementById('percent-replaced').textContent = `${Math.min(100, Math.max(0, replaced)).toFixed(1)}%`; - - // Oldest line logic - const oldestYear = this.years[0]; - document.getElementById('oldest-line').textContent = oldestYear; + let originalYear = this.years[0]; + if (!originalYear || first.total_lines === 0) { + document.getElementById('percent-replaced').textContent = '--'; + } else { + const originalLinesInFirst = first.composition[originalYear] || 0; + const originalLinesInLast = last.composition[originalYear] || 0; + const replaced = ((originalLinesInFirst - originalLinesInLast) / originalLinesInFirst) * 100; + document.getElementById('percent-replaced').textContent = `${Math.min(100, Math.max(0, replaced)).toFixed(1)}%`; + } + document.getElementById('oldest-line').textContent = this.years[0]; } - // Utilities createSVGElement(tag, attrs = {}) { const el = document.createElementNS('http://www.w3.org/2000/svg', tag); Object.entries(attrs).forEach(([key, val]) => el.setAttribute(key, val)); @@ -291,12 +445,15 @@ class TheseusVisualizer { showError(msg) { const error = document.getElementById('chart-error'); - error.textContent = msg; - error.classList.remove('hidden'); + if (error) { + error.textContent = msg; + error.classList.remove('hidden'); + } } hideError() { - document.getElementById('chart-error').classList.add('hidden'); + const error = document.getElementById('chart-error'); + if (error) error.classList.add('hidden'); } debouncedRender() { @@ -307,7 +464,6 @@ class TheseusVisualizer { } } -// Spark the void document.addEventListener('DOMContentLoaded', () => { new TheseusVisualizer(); }); diff --git a/data/manifest.json b/data/manifest.json index 48a28df..2a3ad2c 100644 --- a/data/manifest.json +++ b/data/manifest.json @@ -3,5 +3,15 @@ "name": "claude-code", "file": "claude-code_data.json", "description": "Claude's efficient, local-first coding CLI tool." + }, + { + "name": "react", + "file": "react_data.json", + "description": "A JavaScript library for building user interfaces" + }, + { + "name": "langchain", + "file": "langchain_data.json", + "description": "A framework for developing applications powered by large language models" } ] diff --git a/index.html b/index.html index 4fca236..b828545 100644 --- a/index.html +++ b/index.html @@ -1,5 +1,6 @@ + @@ -7,28 +8,54 @@ - + +
- +

The Ship of Theseus

-

Does a codebase remain the same if every line is replaced? A monthly pulse on software entropy.

- -
- +

Does a codebase remain the same if every line is replaced? A monthly pulse on software + entropy.

+ +
+
+ +
+ +
+
+ Mode: +
+ + +
+
+ +
+ Scale: +
+ + +
+
+
+

[ Scanning repository history... ]
- +
+
@@ -54,19 +81,24 @@

Ancestry Analysis

How to read this chart

-

The X-axis represents time moving forward. The Y-axis shows total lines of code. Each colored band represents the surviving code originally written in a specific year.

+

The X-axis represents time moving forward. The Y-axis shows total lines of code. + Each colored band represents the surviving code originally written in a specific year.

The Architecture

-

This data is generated monthly via a static analysis pipeline. It dynamically scans the repository history and outputs a structured JSON file to minimize visual overhead in the browser.

+

This data is generated monthly via a static analysis pipeline. It dynamically scans + the repository history and outputs a structured JSON file to minimize visual overhead in the + browser.

- +
- + + \ No newline at end of file diff --git a/scripts/analyse_repository.py b/scripts/analyse_repository.py index 175ff42..428e599 100644 --- a/scripts/analyse_repository.py +++ b/scripts/analyse_repository.py @@ -9,6 +9,7 @@ import logging import os import shutil +import stat import subprocess import time from collections import defaultdict @@ -75,7 +76,9 @@ def get_snapshots(repo_path: str) -> list[tuple[str, str]]: continue commit_hash, commit_date = line.split("|") period = commit_date[:7] - snapshots[period] = commit_hash + # Keep the first (newest) commit per period + if period not in snapshots: + snapshots[period] = commit_hash quarterly_months = {"03", "06", "09", "12"} filtered_snapshots: dict[str, str] = {} @@ -155,9 +158,18 @@ def analyze_snapshots(repo_path: str, commit_hash: str) -> dict[str, int]: valid_files = [f for f in files if os.path.isfile(os.path.join(repo_path, f))] - max_workers = int( - os.environ.get("BLAME_WORKERS", min(20, (os.cpu_count() or 1) * 2)) - ) + # Safe BLAME_WORKERS parsing with fallback + default_workers = min(20, (os.cpu_count() or 1) * 2) + try: + env_workers = os.environ.get("BLAME_WORKERS") + if env_workers is not None: + parsed = int(env_workers) + max_workers = max(1, min(parsed, 100)) # Clamp between 1-100 + else: + max_workers = default_workers + except ValueError: + max_workers = default_workers + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_file = { executor.submit(_blame_single_file, repo_path, file): file @@ -318,12 +330,17 @@ def process_repository(repo_slug: str, data_dir: str) -> None: time.sleep(1) def handle_remove_readonly(func, path, exc_info): - """Handle permission errors on Windows/Unix.""" + """Handle permission errors on Windows/Unix by adding write permission.""" try: - os.chmod(path, 0o777) + current_mode = os.stat(path).st_mode + os.chmod( + path, current_mode | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH + ) func(path) - except Exception: - pass + except PermissionError as e: + logger.warning("Permission error cleaning up %s: %s", path, e) + except Exception as e: + logger.warning("Error cleaning up %s: %s", path, e) for attempt in range(3): try: @@ -352,10 +369,17 @@ def handle_remove_readonly(func, path, exc_info): TARGETS = ["anthropics/claude-code", "facebook/react", "langchain-ai/langchain"] + # Bound top-level workers by CPU count + max_top_level_workers = min( + len(TARGETS), int(os.getenv("MAX_TOP_LEVEL_WORKERS", os.cpu_count() or 1)) + ) + overall_start = time.perf_counter() logger.info("Starting analysis pipeline for %d repositories", len(TARGETS)) - with concurrent.futures.ThreadPoolExecutor(max_workers=len(TARGETS)) as executor: + with concurrent.futures.ThreadPoolExecutor( + max_workers=max_top_level_workers + ) as executor: futures = { executor.submit(process_repository, target, DATA_OUTPUT_DIR): target for target in TARGETS diff --git a/style.css b/style.css index 4caf743..c5481a8 100644 --- a/style.css +++ b/style.css @@ -40,7 +40,7 @@ body { } .container { - max-width: 1200px; + max-width: 1400px; margin: 0 auto; padding: 4rem 2rem; display: flex; @@ -54,7 +54,7 @@ body { display: flex; flex-direction: column; align-items: center; - gap: 1.5rem; + gap: 2rem; } .title { @@ -87,7 +87,45 @@ body { backdrop-filter: blur(8px); } -.repo-btn { +.controls-bar { + display: flex; + flex-direction: column; + align-items: center; + gap: 1.5rem; + width: 100%; +} + +.settings-row { + display: flex; + gap: 2rem; + flex-wrap: wrap; + justify-content: center; + align-items: center; +} + +.settings-group { + display: flex; + align-items: center; + gap: 0.75rem; +} + +.control-label { + font-size: 0.7rem; + text-transform: uppercase; + letter-spacing: 0.1em; + color: var(--text-secondary); + opacity: 0.8; +} + +.selector-pill.small { + padding: 0.25rem; + gap: 0.25rem; + border-radius: 0.75rem; +} + +.repo-btn, +.mode-btn, +.scale-btn { padding: 0.75rem 1.5rem; border-radius: 9999px; border: none; @@ -100,12 +138,25 @@ body { opacity: 0.6; } -.repo-btn:hover { +.mode-btn, +.scale-btn { + padding: 0.4rem 1rem; + font-size: 0.75rem; + border-radius: 0.5rem; + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.repo-btn:hover, +.mode-btn:hover, +.scale-btn:hover { opacity: 1; color: var(--text-primary); } -.repo-btn.active { +.repo-btn.active, +.mode-btn.active, +.scale-btn.active { background: rgba(255, 255, 255, 0.1); color: var(--text-primary); opacity: 1; @@ -118,7 +169,7 @@ body { font-size: 0.9rem; color: var(--text-secondary); opacity: 0.7; - margin-top: -0.5rem; + margin-top: 0.5rem; } /* Visualization Canvas */ @@ -133,10 +184,10 @@ body { } .visualization-canvas { - min-height: 500px; + min-height: 650px; display: flex; flex-direction: column; - padding: 2rem; + padding: 1.5rem 1rem 0.5rem 1rem; } .loading-state { @@ -163,9 +214,17 @@ body { } @keyframes pulse { - 0% { opacity: 0.3; } - 50% { opacity: 0.8; } - 100% { opacity: 0.3; } + 0% { + opacity: 0.3; + } + + 50% { + opacity: 0.8; + } + + 100% { + opacity: 0.3; + } } /* Chart Canvas */ @@ -173,41 +232,98 @@ body { flex: 1; width: 100%; position: relative; + display: flex; + flex-direction: column; } svg#main-chart { width: 100%; - height: 100%; - min-height: 400px; + flex: 1; + min-height: 550px; } -/* Custom Tooltip */ .custom-tooltip { position: absolute; pointer-events: none; - background: rgba(10, 10, 12, 0.9); + background: rgba(10, 10, 12, 0.95); border: 1px solid var(--glass-border); - padding: 1rem; - border-radius: 0.75rem; + padding: 1.25rem; + border-radius: 1rem; backdrop-filter: blur(12px); z-index: 100; - min-width: 200px; - box-shadow: 0 10px 30px rgba(0,0,0,0.5); + min-width: 280px; + box-shadow: 0 20px 40px rgba(0, 0, 0, 0.7); } .tooltip-header { - font-size: 0.8rem; + font-size: 0.75rem; + text-transform: uppercase; + letter-spacing: 0.1em; color: var(--text-secondary); - margin-bottom: 0.5rem; + margin-bottom: 0.75rem; border-bottom: 1px solid var(--glass-border); - padding-bottom: 0.25rem; + padding-bottom: 0.5rem; } .tooltip-item { display: flex; justify-content: space-between; + align-items: center; font-size: 0.9rem; - margin-top: 0.25rem; + margin-top: 0.4rem; +} + +.label-group { + display: flex; + align-items: center; + gap: 0.5rem; +} + +.color-dot { + width: 6px; + height: 6px; + border-radius: 50%; +} + +.value-group { + display: flex; + align-items: center; + gap: 0.75rem; +} + +.percent-tag { + font-size: 0.75rem; + color: var(--text-secondary); + opacity: 0.7; + font-family: var(--font-mono); +} + +.tooltip-divider { + height: 1px; + background: var(--glass-border); + margin: 0.75rem 0; +} + +/* Legend Styling */ +.chart-legend { + display: flex; + flex-wrap: wrap; + justify-content: center; + gap: 1.5rem; + padding: 1rem; + background: rgba(255, 255, 255, 0.02); + border-bottom: 1px solid var(--glass-border); + margin-bottom: 1rem; +} + +.legend-item { + display: flex; + align-items: center; + gap: 0.5rem; + font-size: 0.75rem; + color: var(--text-secondary); + text-transform: uppercase; + letter-spacing: 0.05em; } /* Insights Cards */ @@ -304,13 +420,16 @@ svg#main-chart { ::-webkit-scrollbar { width: 8px; } + ::-webkit-scrollbar-track { background: var(--bg-void); } + ::-webkit-scrollbar-thumb { background: rgba(255, 255, 255, 0.1); border-radius: 4px; } + ::-webkit-scrollbar-thumb:hover { background: rgba(255, 255, 255, 0.2); -} +} \ No newline at end of file