reprorusted-python-cli/Makefile at main · paiml/reprorusted-python-cli · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
.PHONY: help install test coverage lint format clean quality citl-train citl-improve citl-export extract-cpython-doctests

help:
	@echo "reprorusted-python-cli - CITL Training Corpus for Depyler"
	@echo ""
	@echo "Setup:"
	@echo "  make install          - Install dependencies with uv"
	@echo ""
	@echo "Corpus Pipeline (GH-7 through GH-22):"
	@echo "  make corpus-pipeline  - Run full pipeline (label → augment → report)"
	@echo "  make corpus-label     - Apply weak supervision labels"
	@echo "  make corpus-augment   - Generate augmented corpus"
	@echo "  make corpus-report    - Generate quality report"
	@echo "  make corpus-analyze   - Analyze zero-success categories"
	@echo "  make corpus-baseline  - Save current report as baseline"
	@echo "  make corpus-diff      - Compare current vs baseline"
	@echo "  make corpus-retranspile - Run depyler on all examples"
	@echo "  make corpus-refresh   - Full refresh: baseline → retranspile → pipeline → diff"
	@echo "  make corpus-category-diff - Show which categories changed status"
	@echo "  make corpus-verify-rust - Verify transpiled Rust compiles"
	@echo "  make corpus-compile-report - Generate Rust compilation JSON report"
	@echo "  make corpus-error-analysis - Categorize compilation errors"
	@echo "  make corpus-record-progress - Record current success rate"
	@echo "  make corpus-progress-history - Show progress over time"
	@echo "  make corpus-recommendations - Generate fix recommendations"
	@echo "  make corpus-dashboard - Show unified status dashboard"
	@echo "  make corpus-ci - Run CI validation (fails on regression)"
	@echo "  make corpus-ci-baseline - Save current as CI baseline"
	@echo "  make corpus-e2e-rate - Measure single-shot compile rate"
	@echo ""
	@echo "Quality Assurance (GH-23 through GH-25):"
	@echo "  make corpus-golden-analyze - Analyze golden trace candidates"
	@echo "  make corpus-golden-export  - Export 50 golden traces to JSON"
	@echo "  make corpus-clippy-check   - Run clippy (soft mode)"
	@echo "  make corpus-clippy-strict  - Run clippy (blocking mode)"
	@echo "  make corpus-hitl-sample    - Generate 5% HITL review sample"
	@echo "  make corpus-hitl-report    - Summarize HITL review findings"
	@echo ""
	@echo "CITL Training:"
	@echo "  make citl-train       - Train depyler oracle from corpus"
	@echo "  make citl-improve     - Run CITL improvement loop on all examples"
	@echo "  make citl-export      - Export training corpus for OIP"
	@echo ""
	@echo "Quality Gates:"
	@echo "  make quality          - Run all quality gates (format → lint → test)"
	@echo "  make format           - Check Python code formatting"
	@echo "  make lint             - Lint Python code"
	@echo "  make test             - Run all Python tests"
	@echo ""
	@echo "Cleanup:"
	@echo "  make clean            - Clean Python build artifacts"
	@echo "  make clean-all        - Clean everything including generated Rust"

# Setup
install:
	@echo "Installing dependencies with uv..."
	@uv sync
	@echo "✅ Dependencies installed"

# CITL Training (depyler integration)
citl-train:
	@echo "Training depyler oracle from corpus..."
	@depyler oracle train --min-samples 50
	@echo "✅ Oracle training complete"

citl-improve:
	@echo "Running CITL improvement loop..."
	@for example_dir in examples/example_*/; do \
		if [ -d "$$example_dir" ]; then \
			for py_file in "$$example_dir"/*.py; do \
				if [ -f "$$py_file" ] && ! echo "$$py_file" | grep -q "test_"; then \
					echo "Processing $$py_file..."; \
					depyler compile "$$py_file" --citl-iterations 3 2>/dev/null || true; \
				fi; \
			done; \
		fi; \
	done
	@echo "✅ CITL improvement complete"

citl-export:
	@echo "Exporting training corpus for OIP..."
	@mkdir -p training_corpus
	@depyler oracle export-oip \
		--input-dir ./examples \
		--output ./training_corpus/citl_corpus.jsonl \
		--min-confidence 0.80 \
		--include-clippy
	@echo "✅ Corpus exported to training_corpus/citl_corpus.jsonl"

# Corpus Extraction - Reproducible doctest extraction from CPython stdlib
# Prerequisites: alimentar (https://github.com/paiml/alimentar)
# Output: data/corpora/cpython-doctests.parquet
CPYTHON_TMP := /tmp/cpython
CPYTHON_LIB_CLEAN := /tmp/cpython-lib-clean

extract-cpython-doctests:
	@echo "Extracting CPython stdlib doctests (reproducible)..."
	@echo "Prerequisites: alimentar must be installed and in PATH"
	@echo ""
	@# Clone CPython if not present
	@if [ ! -d "$(CPYTHON_TMP)" ]; then \
		echo "Cloning CPython stdlib..."; \
		git clone --depth 1 https://github.com/python/cpython $(CPYTHON_TMP); \
	else \
		echo "Using cached CPython at $(CPYTHON_TMP)"; \
		cd $(CPYTHON_TMP) && git pull --ff-only 2>/dev/null || true; \
	fi
	@# Filter out test directories with non-UTF-8 files
	@echo "Filtering stdlib (excluding test/idlelib/turtledemo)..."
	@rm -rf $(CPYTHON_LIB_CLEAN)
	@rsync -a --exclude='test' --exclude='idlelib' --exclude='turtledemo' \
		$(CPYTHON_TMP)/Lib/ $(CPYTHON_LIB_CLEAN)/
	@# Extract doctests using alimentar
	@mkdir -p data/corpora
	@CPYTHON_SHA=$$(cd $(CPYTHON_TMP) && git rev-parse --short HEAD); \
	echo "CPython commit: $$CPYTHON_SHA"; \
	alimentar doctest extract $(CPYTHON_LIB_CLEAN) \
		-o data/corpora/cpython-doctests.parquet \
		--source cpython \
		--version "$$CPYTHON_SHA"
	@echo ""
	@echo "✅ Extracted to data/corpora/cpython-doctests.parquet"
	@echo "   (This file is gitignored - not committed to repository)"
	@ls -lh data/corpora/cpython-doctests.parquet

# Quality Gates
quality: format lint test
	@echo ""
	@echo "✅ All quality gates passed!"

format:
	@echo "Checking Python formatting (ruff)..."
	@uv run ruff format --check examples/
	@echo "✅ Formatting check passed"

format-fix:
	@echo "Fixing Python formatting..."
	@uv run ruff format examples/
	@echo "✅ Formatting fixed"

lint:
	@echo "Linting Python code (ruff)..."
	@uv run ruff check examples/
	@echo "Linting shell scripts (bashrs)..."
	@for script in scripts/*.sh; do \
		if [ -f "$$script" ]; then \
			bashrs lint --ignore SEC010,DET002,DET003,SC2031,SC2035,SC2046,SC2062,SC2064,SC2086,SC2091,SC2092,SC2117,SC2125,SC2128,SC2140,SC2145,SC2154,SC2161,SC2164,SC2183,SC2201,SC2204,SC2231,SC2266,SC2281,SC2317 "$$script"; \
		fi; \
	done
	@echo "✅ Linting passed"

lint-fix:
	@echo "Auto-fixing Python issues..."
	@uv run ruff check --fix examples/
	@echo "✅ Lint fixes applied"

test:
	@echo "Running all Python tests..."
	@uv run pytest examples/ --tb=short --no-cov -n 4 --maxfail=10 --timeout=30 -q
	@echo "✅ All tests passed"

coverage:
	@echo "Running tests with coverage..."
	@uv run pytest examples/ --tb=short --cov=examples --cov-report=term-missing -n 4 --maxfail=10 --timeout=30 -q
	@echo "✅ Coverage report generated"

# Cleanup
clean:
	@echo "Cleaning Python build artifacts..."
	@find examples/ -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
	@find examples/ -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true
	@find examples/ -type f -name ".coverage" -delete 2>/dev/null || true
	@find examples/ -type d -name "htmlcov" -exec rm -rf {} + 2>/dev/null || true
	@find examples/ -type d -name ".ruff_cache" -exec rm -rf {} + 2>/dev/null || true
	@echo "✅ Cleaned Python artifacts"

clean-all: clean
	@echo "Cleaning generated Rust artifacts..."
	@find examples/ -type f -name "*.rs" -delete 2>/dev/null || true
	@find examples/ -name "Cargo.toml" -delete 2>/dev/null || true
	@find examples/ -name "Cargo.lock" -delete 2>/dev/null || true
	@find examples/ -type d -name "target" -exec rm -rf {} + 2>/dev/null || true
	@find examples/ -type d -name "src" -exec rm -rf {} + 2>/dev/null || true
	@find examples/ -type f -executable ! -name "*.py" ! -name "*.sh" -delete 2>/dev/null || true
	@rm -rf training_corpus/
	@echo "✅ Cleaned everything"

# ============================================================================
# Corpus Pipeline (GH-13) - Tarantula/Weak Supervision/Augmentation
# ============================================================================
.PHONY: corpus-label corpus-augment corpus-report corpus-analyze corpus-pipeline

CORPUS_INPUT ?= data/depyler_citl_corpus_v2.parquet
LABELED_CORPUS = data/labeled_corpus.parquet
AUGMENTED_CORPUS = data/augmented_corpus.parquet

corpus-label: $(CORPUS_INPUT)
	@echo "Applying weak supervision labels..."
	@uv run python scripts/label_corpus.py $(CORPUS_INPUT) -o $(LABELED_CORPUS)
	@echo "✅ Labels applied → $(LABELED_CORPUS)"

corpus-augment: $(LABELED_CORPUS)
	@echo "Generating augmented corpus..."
	@uv run python scripts/augment_corpus.py $(LABELED_CORPUS) -o $(AUGMENTED_CORPUS) -m 2
	@echo "✅ Augmented → $(AUGMENTED_CORPUS)"

corpus-report: $(LABELED_CORPUS)
	@echo "Generating quality report..."
	@mkdir -p reports
	@uv run python scripts/corpus_quality_report.py $(LABELED_CORPUS) -o reports/quality_report.json
	@echo "✅ Report → reports/quality_report.json"

corpus-analyze: $(LABELED_CORPUS)
	@echo "Analyzing zero-success categories..."
	@mkdir -p reports
	@uv run python scripts/zero_success_analyzer.py $(LABELED_CORPUS) -o reports/zero_success_analysis.json
	@echo "✅ Analysis → reports/zero_success_analysis.json"

corpus-pipeline: corpus-label corpus-augment corpus-report corpus-analyze
	@echo ""
	@echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
	@echo "✅ Corpus Pipeline Complete!"
	@echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
	@echo "Outputs:"
	@echo "  - $(LABELED_CORPUS)"
	@echo "  - $(AUGMENTED_CORPUS)"
	@echo "  - reports/quality_report.json"
	@echo "  - reports/zero_success_analysis.json"
	@echo ""
	@uv run python scripts/corpus_quality_report.py $(LABELED_CORPUS) --markdown | head -20

# Corpus Diff (GH-14) - Track progress after depyler fixes
.PHONY: corpus-baseline corpus-diff

corpus-baseline: reports/quality_report.json
	@echo "Saving baseline report..."
	@cp reports/quality_report.json reports/baseline.json
	@echo "✅ Baseline saved → reports/baseline.json"

corpus-diff: reports/baseline.json reports/quality_report.json
	@./scripts/corpus_diff.sh reports/baseline.json reports/quality_report.json

# Category Diff (GH-16) - Show which categories changed
.PHONY: corpus-category-diff

corpus-category-diff: data/baseline_corpus.parquet $(LABELED_CORPUS)
	@echo ""
	@echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
	@echo "CATEGORY CHANGES"
	@echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
	@uv run python scripts/category_diff.py data/baseline_corpus.parquet $(LABELED_CORPUS) | \
		while IFS=: read -r key val; do \
			case "$$key" in \
				NOW_PASSING) \
					if [ -n "$$val" ]; then \
						printf "\033[0;32m✅ NOW PASSING:\033[0m\n"; \
						echo "$$val" | tr ',' '\n' | sed 's/^/   - /'; \
					else \
						printf "\033[0;32m✅ NOW PASSING: (none)\033[0m\n"; \
					fi ;; \
				REGRESSED) \
					if [ -n "$$val" ]; then \
						printf "\033[0;31m❌ REGRESSED:\033[0m\n"; \
						echo "$$val" | tr ',' '\n' | sed 's/^/   - /'; \
					else \
						printf "\033[0;32m❌ REGRESSED: (none)\033[0m\n"; \
					fi ;; \
				NET_CHANGE) \
					echo ""; \
					echo "Net: $$val categories" ;; \
			esac; \
		done
	@echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"

data/baseline_corpus.parquet: $(LABELED_CORPUS)
	@echo "Saving baseline corpus..."
	@cp $(LABELED_CORPUS) data/baseline_corpus.parquet
	@echo "✅ Baseline corpus saved → data/baseline_corpus.parquet"

# Retranspile & Refresh (GH-15) - Run latest depyler on corpus
.PHONY: corpus-retranspile corpus-refresh

corpus-retranspile:
	@echo "Retranspiling corpus with depyler $(shell depyler --version 2>/dev/null | head -1)..."
	@uv run python scripts/retranspile_corpus.py
	@echo "✅ Retranspile complete"

corpus-refresh: corpus-baseline corpus-retranspile corpus-pipeline corpus-diff
	@echo ""
	@echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
	@echo "✅ Corpus Refresh Complete!"
	@echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
	@echo "Depyler: $(shell depyler --version 2>/dev/null | head -1)"
	@echo "See diff above for improvement metrics."

# ============================================================================
# Rust Verification (GH-17) - Verify transpiled Rust compiles
# ============================================================================
.PHONY: corpus-verify-rust corpus-compile-report

corpus-verify-rust:
	@echo "Verifying transpiled Rust compiles..."
	@./scripts/verify_rust_compilation.sh --verbose

corpus-compile-report:
	@echo "Generating Rust compilation report..."
	@mkdir -p reports
	@./scripts/verify_rust_compilation.sh --json > reports/rust_compile_report.json
	@echo "✅ Report → reports/rust_compile_report.json"

# ============================================================================
# Error Analysis (GH-18) - Categorize compilation errors
# ============================================================================
.PHONY: corpus-error-analysis

corpus-error-analysis:
	@echo "Analyzing Rust compilation errors..."
	@./scripts/analyze_rust_errors.sh --summary

# ============================================================================
# Progress Tracking (GH-19) - Track success rate over time
# ============================================================================
.PHONY: corpus-record-progress corpus-progress-history

corpus-record-progress:
	@./scripts/progress_tracker.sh --record

corpus-progress-history:
	@./scripts/progress_tracker.sh --history

# ============================================================================
# Recommendations (GH-20) - Generate fix recommendations for depyler
# ============================================================================
.PHONY: corpus-recommendations

corpus-recommendations:
	@./scripts/generate_recommendations.sh

# ============================================================================
# Dashboard (GH-21) - Unified corpus status view
# ============================================================================
.PHONY: corpus-dashboard

corpus-dashboard:
	@./scripts/corpus_dashboard.sh

# ============================================================================
# CI Integration (GH-22) - Automated corpus validation
# ============================================================================
.PHONY: corpus-ci corpus-ci-baseline

corpus-ci:
	@./scripts/ci_runner.sh
	@uv run python scripts/check_test_lib_crates.py

corpus-check-test-libs:
	@uv run python scripts/check_test_lib_crates.py

corpus-ci-baseline:
	@echo "Saving CI baseline..."
	@./scripts/ci_runner.sh --no-fail > /dev/null
	@uv run python3 -c "import json, pyarrow.parquet as pq; df=pq.read_table('data/labeled_corpus.parquet').to_pandas(); print(json.dumps({'total':len(df),'success':int(df['has_rust'].sum()),'failing':len(df)-int(df['has_rust'].sum()),'rate':round(df['has_rust'].sum()*100/len(df),1)}))" > data/ci_baseline.json
	@echo "✅ Baseline saved → data/ci_baseline.json"

# Single-shot compile rate (Refs depyler#193)
# Measures both [[bin]] and [lib] crates that compile with cargo check
corpus-e2e-rate:
	@uv run python scripts/measure_compile_rate.py

corpus-e2e-rate-json:
	@uv run python scripts/measure_compile_rate.py --json

corpus-e2e-rate-verbose:
	@uv run python scripts/measure_compile_rate.py --verbose

# ============================================================================
# Golden Traces (GH-23) - Human-verified fix patterns for oracle training
# ============================================================================
.PHONY: corpus-golden-analyze corpus-golden-export corpus-golden-dry-run

corpus-golden-analyze:
	@echo "Analyzing compilation errors for golden trace candidates..."
	@uv run python scripts/golden_traces_analyzer.py --dry-run

corpus-golden-export:
	@echo "Generating golden traces..."
	@uv run python scripts/golden_traces_analyzer.py --per-code 10
	@echo "✅ Golden traces exported → data/golden_traces.json"

corpus-golden-json:
	@uv run python scripts/golden_traces_analyzer.py --json

# ============================================================================
# Clippy Gate (GH-24) - Blocking quality gate for idiomatic Rust
# ============================================================================
.PHONY: corpus-clippy-check corpus-clippy-report

corpus-clippy-check:
	@echo "Running clippy on all examples..."
	@uv run python scripts/clippy_gate.py --soft
	@echo "✅ Clippy check complete (soft mode - informational only)"

corpus-clippy-strict:
	@echo "Running clippy with strict mode (fails on warnings)..."
	@uv run python scripts/clippy_gate.py --strict
	@echo "✅ Clippy strict check passed"

corpus-clippy-report:
	@echo "Generating clippy report..."
	@mkdir -p reports
	@uv run python scripts/clippy_gate.py --json > reports/clippy_report.json
	@echo "✅ Report → reports/clippy_report.json"

# ============================================================================
# HITL Review (GH-25) - Human-in-the-loop quality assurance
# ============================================================================
.PHONY: corpus-hitl-sample corpus-hitl-report

corpus-hitl-sample:
	@echo "Generating HITL review sample (5% stratified)..."
	@uv run python scripts/hitl_sampler.py --sample-pct 5
	@echo "✅ Sample generated → data/hitl_reviews/"

corpus-hitl-report:
	@echo "Summarizing HITL review findings..."
	@uv run python scripts/hitl_sampler.py --report