Skip to content

Commit 57a9b48

Browse files
JuliusBrusseeBen YouneshireblackoutBortlesboatclaude
committed
fix(compress): UTF-8 stdout, empty/identical guards, inline-code validation, frontmatter cleanup
Five user-contributed fixes consolidated: * UTF-8 stdout (#289 by @ousamabenyounes) — reconfigure stdout/stderr at the top of cli.py so Windows cp1252 consoles don't crash on the ❌ glyph in error/validation branches and mask the real error. * Empty / unchanged compression guards (#292 by @ousamabenyounes, closes #237) — refuse empty input, refuse empty/whitespace/identical Claude output, read back the backup before touching the input. Five regression tests in tests/test_compress_safety.py. * Inline backtick validation (#309 by @hireblackout) — validate_inline_codes closes the silent-overwrite gap where `npm install` → `yarn install` passed validation. Wired into validate(); 11 unit tests in tests/test_validate_inline.py. * Frontmatter angle-bracket fix (#268 by @Bortlesboat, closes #266) — caveman-compress/SKILL.md description now uses FILEPATH instead of <filepath>, plus verify_repo gains a new verify_skill_frontmatter_upload_compatibility check, UTF-8 hardening for Windows, and the activation-banner regex no longer requires a trailing period. * Two test fixtures (claude-md-project.md, mixed-with-code.md) updated so the new inline-backtick validator passes — they were silently dropping `server/src/`, `type(scope): description`, and `status` references in compression. The fixture is documentation of "good" compression, so the fix is to preserve those references. Co-Authored-By: Ben Younes <ousama.benyounes@oratelecom.net> Co-Authored-By: hireblackout <hireblackout@users.noreply.github.com> Co-Authored-By: Andrew Barnes <Bortlesboat@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 84cc3c1 commit 57a9b48

15 files changed

Lines changed: 478 additions & 37 deletions

File tree

caveman-compress/SKILL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ description: >
44
Compress natural language memory files (CLAUDE.md, todos, preferences) into caveman format
55
to save input tokens. Preserves all technical substance, code, URLs, and structure.
66
Compressed version overwrites the original file. Human-readable backup saved as FILE.original.md.
7-
Trigger: /caveman:compress <filepath> or "compress memory file"
7+
Trigger: /caveman:compress FILEPATH or "compress memory file"
88
---
99

1010
# Caveman Compress

caveman-compress/scripts/cli.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@
77
"""
88

99
import sys
10+
11+
# Force UTF-8 on stdout/stderr before any code can print. Windows consoles
12+
# default to cp1252 and crash on the ❌ glyphs in error/validation branches,
13+
# masking the real error and leaving the user with a half-compressed file.
14+
for _stream in (sys.stdout, sys.stderr):
15+
reconfigure = getattr(_stream, "reconfigure", None)
16+
if callable(reconfigure):
17+
try:
18+
reconfigure(encoding="utf-8", errors="replace")
19+
except Exception:
20+
pass
21+
1022
from pathlib import Path
1123

1224
from .compress import compress_file

caveman-compress/scripts/compress.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,10 @@ def compress_file(filepath: Path) -> bool:
182182
original_text = filepath.read_text(errors="ignore")
183183
backup_path = filepath.with_name(filepath.stem + ".original.md")
184184

185+
if not original_text.strip():
186+
print("❌ Refusing to compress: file is empty or whitespace-only.")
187+
return False
188+
185189
# Check if backup already exists to prevent accidental overwriting
186190
if backup_path.exists():
187191
print(f"⚠️ Backup file already exists: {backup_path}")
@@ -193,8 +197,31 @@ def compress_file(filepath: Path) -> bool:
193197
print("Compressing with Claude...")
194198
compressed = call_claude(build_compress_prompt(original_text))
195199

196-
# Save original as backup, write compressed to original path
200+
if compressed is None or not compressed.strip():
201+
print("❌ Compression aborted: Claude returned an empty response.")
202+
print(" Original file is untouched (no backup created).")
203+
return False
204+
205+
if compressed.strip() == original_text.strip():
206+
print("❌ Compression aborted: output is identical to input.")
207+
print(" Likely causes: Claude refused, returned the prompt verbatim, or the file is")
208+
print(" already in caveman form. Original file is untouched (no backup created).")
209+
return False
210+
211+
# Save original as backup, then verify the backup readback before
212+
# touching the input file. If the filesystem dropped bytes (encoding,
213+
# antivirus, disk full), unlink the bad backup and abort instead of
214+
# leaving the user with a corrupt backup + compressed primary.
197215
backup_path.write_text(original_text)
216+
backup_readback = backup_path.read_text(errors="ignore")
217+
if backup_readback != original_text:
218+
print(f"❌ Backup write verification failed: {backup_path}")
219+
print(" In-memory original differs from on-disk backup. Aborting before touching the input file.")
220+
try:
221+
backup_path.unlink()
222+
except OSError:
223+
pass
224+
return False
198225
filepath.write_text(compressed)
199226

200227
# Step 2: Validate + Retry

caveman-compress/scripts/validate.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env python3
22
import re
3+
from collections import Counter
34
from pathlib import Path
45

56
URL_REGEX = re.compile(r"https?://[^\s)]+")
@@ -93,6 +94,12 @@ def count_bullets(text):
9394
return len(BULLET_REGEX.findall(text))
9495

9596

97+
def extract_inline_codes(text):
98+
text_without_fences = re.sub(r"^```[\s\S]*?^```", "", text, flags=re.MULTILINE)
99+
text_without_fences = re.sub(r"^~~~[\s\S]*?^~~~", "", text_without_fences, flags=re.MULTILINE)
100+
return re.findall(r"`([^`]+)`", text_without_fences)
101+
102+
96103
# ---------- Validators ----------
97104

98105

@@ -144,6 +151,22 @@ def validate_bullets(orig, comp, result):
144151
result.add_warning(f"Bullet count changed too much: {b1} -> {b2}")
145152

146153

154+
def validate_inline_codes(orig, comp, result):
155+
c1 = Counter(extract_inline_codes(orig))
156+
c2 = Counter(extract_inline_codes(comp))
157+
158+
if c1 != c2:
159+
lost = set(c1.keys()) - set(c2.keys())
160+
added = set(c2.keys()) - set(c1.keys())
161+
for code, count in c1.items():
162+
if code in c2 and c2[code] < count:
163+
lost.add(f"{code} (lost {count - c2[code]} of {count} occurrences)")
164+
if lost:
165+
result.add_error(f"Inline code lost: {lost}")
166+
if added:
167+
result.add_warning(f"Inline code added: {added}")
168+
169+
147170
# ---------- Main ----------
148171

149172

@@ -158,6 +181,7 @@ def validate(original_path: Path, compressed_path: Path) -> ValidationResult:
158181
validate_urls(orig, comp, result)
159182
validate_paths(orig, comp, result)
160183
validate_bullets(orig, comp, result)
184+
validate_inline_codes(orig, comp, result)
161185

162186
return result
163187

plugins/caveman/skills/compress/scripts/cli.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@
77
"""
88

99
import sys
10+
11+
# Force UTF-8 on stdout/stderr before any code can print. Windows consoles
12+
# default to cp1252 and crash on the ❌ glyphs in error/validation branches,
13+
# masking the real error and leaving the user with a half-compressed file.
14+
for _stream in (sys.stdout, sys.stderr):
15+
reconfigure = getattr(_stream, "reconfigure", None)
16+
if callable(reconfigure):
17+
try:
18+
reconfigure(encoding="utf-8", errors="replace")
19+
except Exception:
20+
pass
21+
1022
from pathlib import Path
1123

1224
from .compress import compress_file

plugins/caveman/skills/compress/scripts/compress.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,10 @@ def compress_file(filepath: Path) -> bool:
182182
original_text = filepath.read_text(errors="ignore")
183183
backup_path = filepath.with_name(filepath.stem + ".original.md")
184184

185+
if not original_text.strip():
186+
print("❌ Refusing to compress: file is empty or whitespace-only.")
187+
return False
188+
185189
# Check if backup already exists to prevent accidental overwriting
186190
if backup_path.exists():
187191
print(f"⚠️ Backup file already exists: {backup_path}")
@@ -193,8 +197,31 @@ def compress_file(filepath: Path) -> bool:
193197
print("Compressing with Claude...")
194198
compressed = call_claude(build_compress_prompt(original_text))
195199

196-
# Save original as backup, write compressed to original path
200+
if compressed is None or not compressed.strip():
201+
print("❌ Compression aborted: Claude returned an empty response.")
202+
print(" Original file is untouched (no backup created).")
203+
return False
204+
205+
if compressed.strip() == original_text.strip():
206+
print("❌ Compression aborted: output is identical to input.")
207+
print(" Likely causes: Claude refused, returned the prompt verbatim, or the file is")
208+
print(" already in caveman form. Original file is untouched (no backup created).")
209+
return False
210+
211+
# Save original as backup, then verify the backup readback before
212+
# touching the input file. If the filesystem dropped bytes (encoding,
213+
# antivirus, disk full), unlink the bad backup and abort instead of
214+
# leaving the user with a corrupt backup + compressed primary.
197215
backup_path.write_text(original_text)
216+
backup_readback = backup_path.read_text(errors="ignore")
217+
if backup_readback != original_text:
218+
print(f"❌ Backup write verification failed: {backup_path}")
219+
print(" In-memory original differs from on-disk backup. Aborting before touching the input file.")
220+
try:
221+
backup_path.unlink()
222+
except OSError:
223+
pass
224+
return False
198225
filepath.write_text(compressed)
199226

200227
# Step 2: Validate + Retry

plugins/caveman/skills/compress/scripts/validate.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env python3
22
import re
3+
from collections import Counter
34
from pathlib import Path
45

56
URL_REGEX = re.compile(r"https?://[^\s)]+")
@@ -93,6 +94,12 @@ def count_bullets(text):
9394
return len(BULLET_REGEX.findall(text))
9495

9596

97+
def extract_inline_codes(text):
98+
text_without_fences = re.sub(r"^```[\s\S]*?^```", "", text, flags=re.MULTILINE)
99+
text_without_fences = re.sub(r"^~~~[\s\S]*?^~~~", "", text_without_fences, flags=re.MULTILINE)
100+
return re.findall(r"`([^`]+)`", text_without_fences)
101+
102+
96103
# ---------- Validators ----------
97104

98105

@@ -144,6 +151,22 @@ def validate_bullets(orig, comp, result):
144151
result.add_warning(f"Bullet count changed too much: {b1} -> {b2}")
145152

146153

154+
def validate_inline_codes(orig, comp, result):
155+
c1 = Counter(extract_inline_codes(orig))
156+
c2 = Counter(extract_inline_codes(comp))
157+
158+
if c1 != c2:
159+
lost = set(c1.keys()) - set(c2.keys())
160+
added = set(c2.keys()) - set(c1.keys())
161+
for code, count in c1.items():
162+
if code in c2 and c2[code] < count:
163+
lost.add(f"{code} (lost {count - c2[code]} of {count} occurrences)")
164+
if lost:
165+
result.add_error(f"Inline code lost: {lost}")
166+
if added:
167+
result.add_warning(f"Inline code added: {added}")
168+
169+
147170
# ---------- Main ----------
148171

149172

@@ -158,6 +181,7 @@ def validate(original_path: Path, compressed_path: Path) -> ValidationResult:
158181
validate_urls(orig, comp, result)
159182
validate_paths(orig, comp, result)
160183
validate_bullets(orig, comp, result)
184+
validate_inline_codes(orig, comp, result)
161185

162186
return result
163187

skills/compress/scripts/cli.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@
77
"""
88

99
import sys
10+
11+
# Force UTF-8 on stdout/stderr before any code can print. Windows consoles
12+
# default to cp1252 and crash on the ❌ glyphs in error/validation branches,
13+
# masking the real error and leaving the user with a half-compressed file.
14+
for _stream in (sys.stdout, sys.stderr):
15+
reconfigure = getattr(_stream, "reconfigure", None)
16+
if callable(reconfigure):
17+
try:
18+
reconfigure(encoding="utf-8", errors="replace")
19+
except Exception:
20+
pass
21+
1022
from pathlib import Path
1123

1224
from .compress import compress_file

skills/compress/scripts/compress.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,10 @@ def compress_file(filepath: Path) -> bool:
182182
original_text = filepath.read_text(errors="ignore")
183183
backup_path = filepath.with_name(filepath.stem + ".original.md")
184184

185+
if not original_text.strip():
186+
print("❌ Refusing to compress: file is empty or whitespace-only.")
187+
return False
188+
185189
# Check if backup already exists to prevent accidental overwriting
186190
if backup_path.exists():
187191
print(f"⚠️ Backup file already exists: {backup_path}")
@@ -193,8 +197,31 @@ def compress_file(filepath: Path) -> bool:
193197
print("Compressing with Claude...")
194198
compressed = call_claude(build_compress_prompt(original_text))
195199

196-
# Save original as backup, write compressed to original path
200+
if compressed is None or not compressed.strip():
201+
print("❌ Compression aborted: Claude returned an empty response.")
202+
print(" Original file is untouched (no backup created).")
203+
return False
204+
205+
if compressed.strip() == original_text.strip():
206+
print("❌ Compression aborted: output is identical to input.")
207+
print(" Likely causes: Claude refused, returned the prompt verbatim, or the file is")
208+
print(" already in caveman form. Original file is untouched (no backup created).")
209+
return False
210+
211+
# Save original as backup, then verify the backup readback before
212+
# touching the input file. If the filesystem dropped bytes (encoding,
213+
# antivirus, disk full), unlink the bad backup and abort instead of
214+
# leaving the user with a corrupt backup + compressed primary.
197215
backup_path.write_text(original_text)
216+
backup_readback = backup_path.read_text(errors="ignore")
217+
if backup_readback != original_text:
218+
print(f"❌ Backup write verification failed: {backup_path}")
219+
print(" In-memory original differs from on-disk backup. Aborting before touching the input file.")
220+
try:
221+
backup_path.unlink()
222+
except OSError:
223+
pass
224+
return False
198225
filepath.write_text(compressed)
199226

200227
# Step 2: Validate + Retry

skills/compress/scripts/validate.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env python3
22
import re
3+
from collections import Counter
34
from pathlib import Path
45

56
URL_REGEX = re.compile(r"https?://[^\s)]+")
@@ -93,6 +94,12 @@ def count_bullets(text):
9394
return len(BULLET_REGEX.findall(text))
9495

9596

97+
def extract_inline_codes(text):
98+
text_without_fences = re.sub(r"^```[\s\S]*?^```", "", text, flags=re.MULTILINE)
99+
text_without_fences = re.sub(r"^~~~[\s\S]*?^~~~", "", text_without_fences, flags=re.MULTILINE)
100+
return re.findall(r"`([^`]+)`", text_without_fences)
101+
102+
96103
# ---------- Validators ----------
97104

98105

@@ -144,6 +151,22 @@ def validate_bullets(orig, comp, result):
144151
result.add_warning(f"Bullet count changed too much: {b1} -> {b2}")
145152

146153

154+
def validate_inline_codes(orig, comp, result):
155+
c1 = Counter(extract_inline_codes(orig))
156+
c2 = Counter(extract_inline_codes(comp))
157+
158+
if c1 != c2:
159+
lost = set(c1.keys()) - set(c2.keys())
160+
added = set(c2.keys()) - set(c1.keys())
161+
for code, count in c1.items():
162+
if code in c2 and c2[code] < count:
163+
lost.add(f"{code} (lost {count - c2[code]} of {count} occurrences)")
164+
if lost:
165+
result.add_error(f"Inline code lost: {lost}")
166+
if added:
167+
result.add_warning(f"Inline code added: {added}")
168+
169+
147170
# ---------- Main ----------
148171

149172

@@ -158,6 +181,7 @@ def validate(original_path: Path, compressed_path: Path) -> ValidationResult:
158181
validate_urls(orig, comp, result)
159182
validate_paths(orig, comp, result)
160183
validate_bullets(orig, comp, result)
184+
validate_inline_codes(orig, comp, result)
161185

162186
return result
163187

0 commit comments

Comments
 (0)