From e923ca48798e797d284185254101e3911cad5fb8 Mon Sep 17 00:00:00 2001
From: Michael Ding <yandy.ding@gmail.com>
Date: Thu, 28 May 2026 12:20:58 +0800
Subject: [PATCH 1/5] feat: add hash-based image naming with git-style
 directory structure

---
 src/kbmate_cli/image_helper.py | 41 +++++++++++++++----
 tests/test_image_helper.py     | 72 +++++++++++++++++++++++++++++++++-
 2 files changed, 104 insertions(+), 9 deletions(-)

diff --git a/src/kbmate_cli/image_helper.py b/src/kbmate_cli/image_helper.py
index d4d808d..a1078a4 100644
--- a/src/kbmate_cli/image_helper.py
+++ b/src/kbmate_cli/image_helper.py
@@ -1,3 +1,4 @@
+import hashlib
 import re
 import shutil
 from pathlib import Path
@@ -10,7 +11,14 @@ def normalize_image_refs(markdown: str) -> str:
     return result
 
 
-def extract_and_relink_images(markdown: str, src_dir: str, dst_dir: str) -> str:
+def extract_and_relink_images(
+    markdown: str,
+    src_dir: str,
+    dst_dir: str,
+    *,
+    sequential: bool = False,
+    ref_subdir: str = "",
+) -> str:
     src_path = Path(src_dir)
     dst_path = Path(dst_dir)
     dst_path.mkdir(parents=True, exist_ok=True)
@@ -24,12 +32,29 @@ def extract_and_relink_images(markdown: str, src_dir: str, dst_dir: str) -> str:
         filename = Path(img_path).name
         src_file = next(src_path.rglob(filename), None)
         if src_file is not None and src_file.is_file():
-            new_name = f"image-{counter:03d}{src_file.suffix}"
-            dst_file = dst_path / new_name
-            shutil.move(str(src_file), str(dst_file))
-            old_ref = match.group(0)
-            new_ref = f"![](assets/{dst_path.name}/{new_name})"
-            result = result.replace(old_ref, new_ref, 1)
-            counter += 1
+            if sequential:
+                new_name = f"image-{counter:03d}{src_file.suffix}"
+                dst_file = dst_path / new_name
+                shutil.move(str(src_file), str(dst_file))
+                old_ref = match.group(0)
+                new_ref = f"![](assets/{dst_path.name}/{new_name})"
+                result = result.replace(old_ref, new_ref, 1)
+                counter += 1
+            else:
+                content = src_file.read_bytes()
+                hash_hex = hashlib.sha256(content).hexdigest()
+                hash_prefix = hash_hex[:2]
+                hash_sub = hash_hex[2:4]
+                hash_dir = dst_path / hash_prefix / hash_sub
+                hash_dir.mkdir(parents=True, exist_ok=True)
+                dst_file = hash_dir / f"{hash_hex}{src_file.suffix}"
+                if not dst_file.exists():
+                    shutil.move(str(src_file), str(dst_file))
+                else:
+                    src_file.unlink()
+                old_ref = match.group(0)
+                ref_parts = [p for p in ("assets", ref_subdir, hash_prefix, hash_sub, f"{hash_hex}{src_file.suffix}") if p]
+                new_ref = f"![]({'/'.join(ref_parts)})"
+                result = result.replace(old_ref, new_ref, 1)
 
     return result
diff --git a/tests/test_image_helper.py b/tests/test_image_helper.py
index 840918d..0df0f3a 100644
--- a/tests/test_image_helper.py
+++ b/tests/test_image_helper.py
@@ -1,3 +1,5 @@
+import hashlib
+
 import pytest
 from pathlib import Path
 from kbmate_cli.image_helper import normalize_image_refs, extract_and_relink_images
@@ -29,10 +31,78 @@ def test_extract_and_relink_images():
     (src_dir / "doc-0001-01.png").touch()
     (src_dir / "doc-0001-02.png").touch()
 
-    result = extract_and_relink_images(md_content, str(src_dir), str(dst_dir))
+    result = extract_and_relink_images(md_content, str(src_dir), str(dst_dir), sequential=True)
 
     assert (dst_dir / "image-001.png").exists()
     assert (dst_dir / "image-002.png").exists()
     assert "![](assets/test_relink_dst/image-001.png)" in result
     assert "![](assets/test_relink_dst/image-002.png)" in result
     assert "doc-0001-01" not in result
+
+
+def test_extract_and_relink_images_hash_naming():
+    content1 = b"hello world image 1"
+    content2 = b"hello world image 2"
+    md_content = (
+        "![](/tmp/src/doc-0001-01.png)\n"
+        "![](/tmp/src/doc-0001-02.png)"
+    )
+    src_dir = Path("/tmp/test_hash_src")
+    dst_dir = Path("/tmp/test_hash_dst")
+    src_dir.mkdir(parents=True, exist_ok=True)
+    dst_dir.mkdir(parents=True, exist_ok=True)
+    (src_dir / "doc-0001-01.png").write_bytes(content1)
+    (src_dir / "doc-0001-02.png").write_bytes(content2)
+
+    hash1 = hashlib.sha256(content1).hexdigest()
+    hash2 = hashlib.sha256(content2).hexdigest()
+
+    result = extract_and_relink_images(md_content, str(src_dir), str(dst_dir), sequential=False)
+
+    assert (dst_dir / hash1[:2] / hash1[2:4] / f"{hash1}.png").exists()
+    assert (dst_dir / hash2[:2] / hash2[2:4] / f"{hash2}.png").exists()
+    assert f"assets/{hash1[:2]}/{hash1[2:4]}/{hash1}.png" in result
+    assert f"assets/{hash2[:2]}/{hash2[2:4]}/{hash2}.png" in result
+    assert "image-001" not in result
+
+
+def test_extract_and_relink_images_dedup():
+    content = b"duplicate content"
+    md_content = (
+        "![](/tmp/src_dedup/img-a.png)\n"
+        "![](/tmp/src_dedup/img-b.png)"
+    )
+    src_dir = Path("/tmp/test_dedup_src")
+    dst_dir = Path("/tmp/test_dedup_dst")
+    src_dir.mkdir(parents=True, exist_ok=True)
+    dst_dir.mkdir(parents=True, exist_ok=True)
+    (src_dir / "img-a.png").write_bytes(content)
+    (src_dir / "img-b.png").write_bytes(content)
+
+    hash_hex = hashlib.sha256(content).hexdigest()
+
+    result = extract_and_relink_images(md_content, str(src_dir), str(dst_dir), sequential=False)
+
+    hash_file = dst_dir / hash_hex[:2] / hash_hex[2:4] / f"{hash_hex}.png"
+    assert hash_file.exists()
+    expected_ref = f"assets/{hash_hex[:2]}/{hash_hex[2:4]}/{hash_hex}.png"
+    assert result.count(expected_ref) == 2
+
+
+def test_extract_and_relink_images_hash_ref_subdir():
+    content = b"some content"
+    md_content = "![](/tmp/src_ref/img.png)"
+    src_dir = Path("/tmp/test_ref_src")
+    dst_dir = Path("/tmp/test_ref_dst")
+    src_dir.mkdir(parents=True, exist_ok=True)
+    dst_dir.mkdir(parents=True, exist_ok=True)
+    (src_dir / "img.png").write_bytes(content)
+
+    hash_hex = hashlib.sha256(content).hexdigest()
+    result = extract_and_relink_images(
+        md_content, str(src_dir), str(dst_dir),
+        sequential=False, ref_subdir="subdir",
+    )
+
+    expected = f"assets/subdir/{hash_hex[:2]}/{hash_hex[2:4]}/{hash_hex}.png"
+    assert expected in result

From 0849eacebbe00ee46b6caca93487477d09f750b7 Mon Sep 17 00:00:00 2001
From: Michael Ding <yandy.ding@gmail.com>
Date: Thu, 28 May 2026 12:26:37 +0800
Subject: [PATCH 2/5] feat: add --assets-seqname flag, default to hash-based
 image naming

---
 src/kbmate_cli/main.py | 30 ++++++++++++++++++++----------
 tests/test_cli.py      | 36 ++++++++++++++++++++++++++++++++----
 2 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/src/kbmate_cli/main.py b/src/kbmate_cli/main.py
index 6d74451..bb46b73 100644
--- a/src/kbmate_cli/main.py
+++ b/src/kbmate_cli/main.py
@@ -48,22 +48,22 @@ def _collect_files_from_list(filelist: Path) -> list[str]:
     return [line.strip() for line in lines if line.strip()]
 
 
-def _convert_pdf(src: Path, assets_dir: Path) -> str:
+def _convert_pdf(src: Path, src_dir: Path, dst_dir: Path, *, sequential: bool = False, ref_subdir: str = "") -> str:
     from kbmate_cli.pdf_converter import convert_pdf
     from kbmate_cli.image_helper import extract_and_relink_images
 
-    md = convert_pdf(str(src), str(assets_dir))
-    return extract_and_relink_images(md, str(assets_dir), str(assets_dir))
+    md = convert_pdf(str(src), str(src_dir))
+    return extract_and_relink_images(md, str(src_dir), str(dst_dir), sequential=sequential, ref_subdir=ref_subdir)
 
 
-def _convert_docx(src: Path, assets_dir: Path) -> str:
+def _convert_docx(src: Path, src_dir: Path, dst_dir: Path, *, sequential: bool = False, ref_subdir: str = "") -> str:
     from kbmate_cli.docx_converter import convert_docx
     from kbmate_cli.image_helper import normalize_image_refs, extract_and_relink_images
 
-    pandoc_output = assets_dir / "pandoc_output"
+    pandoc_output = src_dir / "pandoc_output"
     md = convert_docx(str(src), str(pandoc_output))
     md = normalize_image_refs(md)
-    md = extract_and_relink_images(md, str(pandoc_output), str(assets_dir))
+    md = extract_and_relink_images(md, str(pandoc_output), str(dst_dir), sequential=sequential, ref_subdir=ref_subdir)
     if pandoc_output.exists():
         import shutil
         shutil.rmtree(pandoc_output)
@@ -82,6 +82,7 @@ def convert_single(
     *,
     layout: Literal["flat", "mirror"] = "flat",
     relative_to: Path | None = None,
+    assets_seqname: bool = False,
 ) -> None:
     ext = source_path.suffix.lower()
     converter = _CONVERTERS.get(ext)
@@ -107,7 +108,14 @@ def convert_single(
     converts_dir.mkdir(parents=True, exist_ok=True)
     assets_dir.mkdir(parents=True, exist_ok=True)
 
-    markdown_content = converter(source_path, assets_dir)
+    if assets_seqname:
+        markdown_content = converter(source_path, assets_dir, assets_dir, sequential=True)
+    else:
+        hash_base = assets_parent / rel
+        hash_base.mkdir(parents=True, exist_ok=True)
+        ref_subdir = "" if rel == Path(".") else str(rel)
+        markdown_content = converter(source_path, assets_dir, hash_base, sequential=False, ref_subdir=ref_subdir)
+
     md_path = converts_dir / f"{safe_stem}.md"
     md_path.write_text(markdown_content, encoding="utf-8")
     typer.echo(f"Converted: {source_path} -> {md_path}")
@@ -117,6 +125,7 @@ def convert_single(
 def convert(
     source_file: str = typer.Argument(..., help="Path or URL to the .docx or .pdf file"),
     output_dir: str = typer.Option("raw", help="Output directory"),
+    assets_seqname: bool = typer.Option(False, "--assets-seqname", help="Use sequential naming for asset images instead of content hash"),
 ):
     try:
         source_file, temp_path = _resolve_source(source_file)
@@ -130,7 +139,7 @@ def convert(
         raise typer.Exit(code=1)
 
     try:
-        convert_single(src, Path(output_dir))
+        convert_single(src, Path(output_dir), assets_seqname=assets_seqname)
     except ValueError as e:
         typer.echo(f"Error: {e}", err=True)
         raise typer.Exit(code=1)
@@ -145,6 +154,7 @@ def bulk_convert(
     file_list: str = typer.Option(None, "-f", "--file-list", help="File containing one source per line"),
     output_dir: str = typer.Option("raw", help="Output directory"),
     output_layout: str = typer.Option("flat", "--output-layout", help="Output layout: flat or mirror"),
+    assets_seqname: bool = typer.Option(False, "--assets-seqname", help="Use sequential naming for asset images instead of content hash"),
 ):
     if (recursive is not None) == (file_list is not None):
         typer.echo("Error: specify either -r (directory) or -f (file list), not both", err=True)
@@ -163,7 +173,7 @@ def bulk_convert(
             raise typer.Exit(code=1)
         for src_path in _collect_files_from_dir(root):
             try:
-                convert_single(src_path, out, layout=output_layout, relative_to=root)
+                convert_single(src_path, out, layout=output_layout, relative_to=root, assets_seqname=assets_seqname)
             except Exception as e:
                 typer.echo(f"Error converting {src_path}: {e}", err=True)
     else:
@@ -192,7 +202,7 @@ def bulk_convert(
                     print_cleanup_hint(temp_path)
                 continue
             try:
-                convert_single(src, out, layout=output_layout)
+                convert_single(src, out, layout=output_layout, assets_seqname=assets_seqname)
             except Exception as e:
                 typer.echo(f"Error converting {src}: {e}", err=True)
             finally:
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 89c2b6b..8d753f0 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -36,15 +36,43 @@ def test_convert_lunwen_pdf():
     assert result.exit_code == 0
 
 
+def test_convert_assets_seqname_flag_in_help():
+    result = runner.invoke(app, ["convert", "--help"])
+    assert result.exit_code == 0
+    assert "--assets-seqname" in result.stdout
+
+
+def test_bulk_convert_assets_seqname_flag_in_help():
+    result = runner.invoke(app, ["bulk-convert", "--help"])
+    assert result.exit_code == 0
+    assert "--assets-seqname" in result.stdout
+
+
+@patch.dict("kbmate_cli.main._CONVERTERS", {".pdf": MagicMock(return_value="# mock")})
+def test_bulk_convert_assets_seqname_flag():
+    with tempfile.TemporaryDirectory() as tmp:
+        root = Path(tmp)
+        pdf = root / "a.pdf"
+        pdf.write_text("fake")
+        out = root / "out"
+        result = runner.invoke(app, [
+            "bulk-convert", "-r", str(root),
+            "--output-dir", str(out),
+            "--assets-seqname",
+        ])
+        assert result.exit_code == 0
+        assets_dir = out / "assets" / "a"
+        assert assets_dir.exists()
+
+
 def test_convert_pdf_with_spaces_in_filename():
     src = FIXTURE_DIR / "eigent README CN.pdf"
     out = Path("/tmp/test_cli_spaces_output")
     result = runner.invoke(app, ["convert", str(src), "--output-dir", str(out)])
     assert result.exit_code == 0, f"Failed with output: {result.output}"
-    assets_dir = out / "assets" / "eigent_README_CN"
-    assert assets_dir.exists()
-    images = list(assets_dir.glob("*"))
-    assert len(images) > 0, f"No images found in {assets_dir}"
+    assert (out / "assets").exists()
+    hash_dirs = list((out / "assets").glob("*"))
+    assert len(hash_dirs) > 0, f"No hash dirs found in {out / 'assets'}"
 
 
 def test_convert_file_url():

From 0e71ada7c3d200eb927afa5cf1ba003667e680e0 Mon Sep 17 00:00:00 2001
From: Michael Ding <yandy.ding@gmail.com>
Date: Thu, 28 May 2026 13:16:15 +0800
Subject: [PATCH 3/5] docs: add design spec and implementation plan for asset
 hash naming

---
 .../plans/2026-05-28-assets-hash-naming.md    | 369 ++++++++++++++++++
 .../2026-05-28-assets-hash-naming-design.md   | 144 +++++++
 2 files changed, 513 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-05-28-assets-hash-naming.md
 create mode 100644 docs/superpowers/specs/2026-05-28-assets-hash-naming-design.md

diff --git a/docs/superpowers/plans/2026-05-28-assets-hash-naming.md b/docs/superpowers/plans/2026-05-28-assets-hash-naming.md
new file mode 100644
index 0000000..657f212
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-28-assets-hash-naming.md
@@ -0,0 +1,369 @@
+# Assets Hash Naming Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add `--assets-seqname` flag to `convert`/`bulk-convert`; default image naming uses SHA256 content hash with git-style two-level directory.
+
+**Architecture:** `extract_and_relink_images()` in `image_helper.py` receives `sequential` and `ref_subdir` parameters. Hash mode computes SHA256, stores under `dst_dir/{hash[:2]}/{hash[2:4]}/{hash64}.ext`, deduplicates. Call chain: CLI → `convert_single` → `_convert_pdf`/`_convert_docx` → `extract_and_relink_images`.
+
+**Tech Stack:** Python 3.12+, `hashlib` (stdlib), `shutil` (stdlib), `typer`
+
+---
+
+## File Structure
+
+| File | Responsibility | Change |
+|------|---------------|--------|
+| `src/kbmate_cli/image_helper.py` | Hash computation, file move, ref rewrite | Add `sequential`/`ref_subdir` params, hash naming path |
+| `src/kbmate_cli/main.py` | CLI commands, orchestration | Add `--assets-seqname` option, propagate through call chain |
+| `tests/test_image_helper.py` | Test hash naming logic | Add hash naming + dedup + ref_subdir tests |
+| `tests/test_cli.py` | CLI integration tests | Add `--assets-seqname` integration test |
+
+---
+
+### Task 1: Update `extract_and_relink_images` with hash naming
+
+**Files:**
+- Modify: `src/kbmate_cli/image_helper.py`
+- Test: `tests/test_image_helper.py`
+
+- [ ] **Step 1: Write failing test for hash naming**
+
+```python
+import hashlib
+from kbmate_cli.image_helper import extract_and_relink_images
+
+def test_extract_and_relink_images_hash_naming():
+    content1 = b"hello world image 1"
+    content2 = b"hello world image 2"
+    md_content = (
+        "![](/tmp/src/doc-0001-01.png)\n"
+        "![](/tmp/src/doc-0001-02.png)"
+    )
+    src_dir = Path("/tmp/test_hash_src")
+    dst_dir = Path("/tmp/test_hash_dst")
+    src_dir.mkdir(parents=True, exist_ok=True)
+    dst_dir.mkdir(parents=True, exist_ok=True)
+    (src_dir / "doc-0001-01.png").write_bytes(content1)
+    (src_dir / "doc-0001-02.png").write_bytes(content2)
+
+    hash1 = hashlib.sha256(content1).hexdigest()
+    hash2 = hashlib.sha256(content2).hexdigest()
+
+    result = extract_and_relink_images(md_content, str(src_dir), str(dst_dir), sequential=False)
+
+    assert (dst_dir / hash1[:2] / hash1[2:4] / f"{hash1}.png").exists()
+    assert (dst_dir / hash2[:2] / hash2[2:4] / f"{hash2}.png").exists()
+    assert f"assets/{hash1[:2]}/{hash1[2:4]}/{hash1}.png" in result
+    assert f"assets/{hash2[:2]}/{hash2[2:4]}/{hash2}.png" in result
+    assert "image-001" not in result
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `uv run pytest tests/test_image_helper.py::test_extract_and_relink_images_hash_naming -v`
+Expected: FAIL — `extract_and_relink_images` doesn't accept `sequential` keyword
+
+- [ ] **Step 3: Add `sequential` and `ref_subdir` params to `extract_and_relink_images`**
+
+```python
+import hashlib
+
+def extract_and_relink_images(
+    markdown: str,
+    src_dir: str,
+    dst_dir: str,
+    *,
+    sequential: bool = False,
+    ref_subdir: str = "",
+) -> str:
+    src_path = Path(src_dir)
+    dst_path = Path(dst_dir)
+    dst_path.mkdir(parents=True, exist_ok=True)
+
+    img_pattern = re.compile(r'!\[.*?\]\(([^)]+)\)')
+    counter = 1
+    result = markdown
+
+    for match in img_pattern.finditer(markdown):
+        img_path = match.group(1)
+        filename = Path(img_path).name
+        src_file = next(src_path.rglob(filename), None)
+        if src_file is not None and src_file.is_file():
+            if sequential:
+                new_name = f"image-{counter:03d}{src_file.suffix}"
+                dst_file = dst_path / new_name
+                shutil.move(str(src_file), str(dst_file))
+                old_ref = match.group(0)
+                new_ref = f"![](assets/{dst_path.name}/{new_name})"
+                result = result.replace(old_ref, new_ref, 1)
+                counter += 1
+            else:
+                content = src_file.read_bytes()
+                hash_hex = hashlib.sha256(content).hexdigest()
+                hash_prefix = hash_hex[:2]
+                hash_sub = hash_hex[2:4]
+                hash_dir = dst_path / hash_prefix / hash_sub
+                hash_dir.mkdir(parents=True, exist_ok=True)
+                dst_file = hash_dir / f"{hash_hex}{src_file.suffix}"
+                if not dst_file.exists():
+                    shutil.move(str(src_file), str(dst_file))
+                else:
+                    src_file.unlink()
+                old_ref = match.group(0)
+                ref_parts = [p for p in ("assets", ref_subdir, hash_prefix, hash_sub, f"{hash_hex}{src_file.suffix}") if p]
+                new_ref = f"![]({'/'.join(ref_parts)})"
+                result = result.replace(old_ref, new_ref, 1)
+
+    return result
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `uv run pytest tests/test_image_helper.py::test_extract_and_relink_images_hash_naming -v`
+Expected: PASS
+
+- [ ] **Step 5: Write test for deduplication**
+
+```python
+def test_extract_and_relink_images_dedup():
+    content = b"duplicate content"
+    md_content = (
+        "![](/tmp/src_dedup/img-a.png)\n"
+        "![](/tmp/src_dedup/img-b.png)"
+    )
+    src_dir = Path("/tmp/test_dedup_src")
+    dst_dir = Path("/tmp/test_dedup_dst")
+    src_dir.mkdir(parents=True, exist_ok=True)
+    dst_dir.mkdir(parents=True, exist_ok=True)
+    (src_dir / "img-a.png").write_bytes(content)
+    (src_dir / "img-b.png").write_bytes(content)
+
+    hash_hex = hashlib.sha256(content).hexdigest()
+
+    result = extract_and_relink_images(md_content, str(src_dir), str(dst_dir), sequential=False)
+
+    # Only one file on disk
+    hash_file = dst_dir / hash_hex[:2] / hash_hex[2:4] / f"{hash_hex}.png"
+    assert hash_file.exists()
+    # Both refs point to the same hash path
+    expected_ref = f"assets/{hash_hex[:2]}/{hash_hex[2:4]}/{hash_hex}.png"
+    assert result.count(expected_ref) == 2
+```
+
+- [ ] **Step 6: Run dedup test**
+
+Run: `uv run pytest tests/test_image_helper.py::test_extract_and_relink_images_dedup -v`
+Expected: PASS
+
+- [ ] **Step 7: Write test for `ref_subdir` in hash mode**
+
+```python
+def test_extract_and_relink_images_hash_ref_subdir():
+    content = b"some content"
+    md_content = "![](/tmp/src_ref/img.png)"
+    src_dir = Path("/tmp/test_ref_src")
+    dst_dir = Path("/tmp/test_ref_dst")
+    src_dir.mkdir(parents=True, exist_ok=True)
+    dst_dir.mkdir(parents=True, exist_ok=True)
+    (src_dir / "img.png").write_bytes(content)
+
+    hash_hex = hashlib.sha256(content).hexdigest()
+    result = extract_and_relink_images(
+        md_content, str(src_dir), str(dst_dir),
+        sequential=False, ref_subdir="subdir",
+    )
+
+    expected = f"assets/subdir/{hash_hex[:2]}/{hash_hex[2:4]}/{hash_hex}.png"
+    assert expected in result
+```
+
+- [ ] **Step 8: Run ref_subdir test**
+
+Run: `uv run pytest tests/test_image_helper.py::test_extract_and_relink_images_hash_ref_subdir -v`
+Expected: PASS
+
+- [ ] **Step 9: Run all image helper tests**
+
+Run: `uv run pytest tests/test_image_helper.py -v`
+Expected: All 5 tests pass (3 new + 2 existing)
+
+- [ ] **Step 10: Commit**
+
+```bash
+git add src/kbmate_cli/image_helper.py tests/test_image_helper.py
+git commit -m "feat: add hash-based image naming with git-style directory structure"
+```
+
+---
+
+### Task 2: Wire `--assets-seqname` through CLI chain
+
+**Files:**
+- Modify: `src/kbmate_cli/main.py`
+- Test: `tests/test_cli.py`
+
+- [ ] **Step 1: Write failing CLI test for `--assets-seqname`**
+
+```python
+def test_convert_assets_seqname_flag_in_help():
+    result = runner.invoke(app, ["convert", "--help"])
+    assert result.exit_code == 0
+    assert "--assets-seqname" in result.stdout
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `uv run pytest tests/test_cli.py::test_convert_assets_seqname_flag_in_help -v`
+Expected: FAIL — `--assets-seqname` not yet added
+
+- [ ] **Step 3: Add `assets_seqname` parameter to `convert` and `bulk_convert`**
+
+```python
+@app.command()
+def convert(
+    source_file: str = typer.Argument(..., help="Path or URL to the .docx or .pdf file"),
+    output_dir: str = typer.Option("raw", help="Output directory"),
+    assets_seqname: bool = typer.Option(False, "--assets-seqname", help="Use sequential naming for asset images instead of content hash"),
+):
+    ...
+    try:
+        convert_single(src, Path(output_dir), assets_seqname=assets_seqname)
+    ...
+```
+
+```python
+@app.command()
+def bulk_convert(
+    recursive: str = typer.Option(None, "-r", "--recursive", help="Directory to scan recursively"),
+    file_list: str = typer.Option(None, "-f", "--file-list", help="File containing one source per line"),
+    output_dir: str = typer.Option("raw", help="Output directory"),
+    output_layout: str = typer.Option("flat", "--output-layout", help="Output layout: flat or mirror"),
+    assets_seqname: bool = typer.Option(False, "--assets-seqname", help="Use sequential naming for asset images instead of content hash"),
+):
+    ...
+    for src_path in _collect_files_from_dir(root):
+        try:
+            convert_single(src_path, out, layout=output_layout, relative_to=root, assets_seqname=assets_seqname)
+        ...
+    for line in lines:
+        try:
+            convert_single(src, out, layout=output_layout, assets_seqname=assets_seqname)
+        ...
+```
+
+- [ ] **Step 4: Update `convert_single` signature and implementation**
+
+```python
+def convert_single(
+    source_path: Path,
+    output_dir: Path,
+    *,
+    layout: Literal["flat", "mirror"] = "flat",
+    relative_to: Path | None = None,
+    assets_seqname: bool = False,
+) -> None:
+    ...
+    assets_dir = assets_parent / rel / safe_stem
+    converts_dir = output_dir / "converts" / rel
+    converts_dir.mkdir(parents=True, exist_ok=True)
+    assets_dir.mkdir(parents=True, exist_ok=True)
+
+    if assets_seqname:
+        markdown_content = converter(source_path, assets_dir, assets_dir, sequential=True)
+    else:
+        hash_base = assets_parent / rel
+        hash_base.mkdir(parents=True, exist_ok=True)
+        ref_subdir = "" if rel == Path(".") else str(rel)
+        markdown_content = converter(source_path, assets_dir, hash_base, sequential=False, ref_subdir=ref_subdir)
+    ...
+```
+
+- [ ] **Step 5: Update `_convert_pdf` and `_convert_docx` signatures**
+
+```python
+def _convert_pdf(src: Path, src_dir: Path, dst_dir: Path, *, sequential: bool = False, ref_subdir: str = "") -> str:
+    from kbmate_cli.pdf_converter import convert_pdf
+    from kbmate_cli.image_helper import extract_and_relink_images
+    md = convert_pdf(str(src), str(src_dir))
+    return extract_and_relink_images(md, str(src_dir), str(dst_dir), sequential=sequential, ref_subdir=ref_subdir)
+
+
+def _convert_docx(src: Path, src_dir: Path, dst_dir: Path, *, sequential: bool = False, ref_subdir: str = "") -> str:
+    from kbmate_cli.docx_converter import convert_docx
+    from kbmate_cli.image_helper import normalize_image_refs, extract_and_relink_images
+    pandoc_output = src_dir / "pandoc_output"
+    md = convert_docx(str(src), str(pandoc_output))
+    md = normalize_image_refs(md)
+    md = extract_and_relink_images(md, str(pandoc_output), str(dst_dir), sequential=sequential, ref_subdir=ref_subdir)
+    if pandoc_output.exists():
+        import shutil
+        shutil.rmtree(pandoc_output)
+    return md
+```
+
+- [ ] **Step 6: Run test to verify it passes**
+
+Run: `uv run pytest tests/test_cli.py::test_convert_assets_seqname_flag_in_help -v`
+Expected: PASS
+
+- [ ] **Step 7: Add integration test for non-sequential (hash) default**
+
+```python
+@patch.dict("kbmate_cli.main._CONVERTERS", {".pdf": MagicMock(return_value="# mock")})
+def test_bulk_convert_assets_seqname_flag():
+    with tempfile.TemporaryDirectory() as tmp:
+        root = Path(tmp)
+        pdf = root / "a.pdf"
+        pdf.write_text("fake")
+        out = root / "out"
+        result = runner.invoke(app, [
+            "bulk-convert", "-r", str(root),
+            "--output-dir", str(out),
+            "--assets-seqname",
+        ])
+        assert result.exit_code == 0
+
+        # Sequential mode should have per-doc assets subdir
+        assets_dir = out / "assets" / "a"
+        assert assets_dir.exists()
+```
+
+- [ ] **Step 8: Run integration test**
+
+Run: `uv run pytest tests/test_cli.py::test_bulk_convert_assets_seqname_flag -v`
+Expected: PASS
+
+- [ ] **Step 9: Update `test_convert_pdf_with_spaces_in_filename` to pass with hash naming**
+
+The existing test checks for per-doc subdirectory `assets/eigent_README_CN`. With default hash naming, images no longer go to per-doc subdirs. Update the test:
+
+```python
+def test_convert_pdf_with_spaces_in_filename():
+    src = FIXTURE_DIR / "eigent README CN.pdf"
+    out = Path("/tmp/test_cli_spaces_output")
+    result = runner.invoke(app, ["convert", str(src), "--output-dir", str(out)])
+    assert result.exit_code == 0, f"Failed with output: {result.output}"
+    # In default hash mode, images are under hash subdirs inside assets/
+    assert (out / "assets").exists()
+    # At least one hash directory level exists
+    hash_dirs = list((out / "assets").glob("*"))
+    assert len(hash_dirs) > 0, f"No hash dirs found in {out / 'assets'}"
+```
+
+- [ ] **Step 10: Run all existing tests**
+
+Run: `uv run pytest tests/test_cli.py tests/test_image_helper.py -v`
+Expected: All tests pass. (If any fail, fix them.)
+
+- [ ] **Step 11: Run full test suite**
+
+Run: `uv run pytest -v`
+Expected: All tests pass, coverage >= 85%
+
+- [ ] **Step 12: Commit**
+
+```bash
+git add src/kbmate_cli/main.py src/kbmate_cli/image_helper.py tests/
+git commit -m "feat: add --assets-seqname flag, default to hash-based image naming"
+```
diff --git a/docs/superpowers/specs/2026-05-28-assets-hash-naming-design.md b/docs/superpowers/specs/2026-05-28-assets-hash-naming-design.md
new file mode 100644
index 0000000..773640f
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-28-assets-hash-naming-design.md
@@ -0,0 +1,144 @@
+# Assets Hash Naming Design
+
+> **Goal:** Change the default image naming strategy in kbmate-cli from sequential (`image-001.png`) to content-addressed (SHA256 hash), with a `--assets-seqname` flag to opt into the old sequential behavior.
+
+**Architecture:** The change is contained in `image_helper.py`'s `extract_and_relink_images()`, which receives a new `sequential` boolean parameter. The hash-naming path computes SHA256 of file content, creates a two-level directory structure (`hash[:2]/hash[2:4]/fullhash.ext`), and moves the file accordingly. Markdown references are updated to match the new paths. CLI entry points (`convert`, `bulk_convert`) expose a `--assets-seqname` flag that propagates down to the converter.
+
+**Tech Stack:** Python 3.12+, `hashlib` (stdlib), `shutil` (stdlib)
+
+---
+
+## Default behavior (hash naming)
+
+When `--assets-seqname` is NOT set:
+
+1. For each extracted image, read its content and compute SHA256 digest (full 64-character hex string)
+2. Create a two-level subdirectory under the assets directory: `hash[:2] / hash[2:4] /`
+3. Move the image file to `hash[:2] / hash[2:4] / fullhash.suffix`
+4. If a file with the same hash already exists at the target path, skip moving (deduplication)
+5. Update the markdown image reference to point to the correct relative path: `![](assets/hash[:2]/hash[2:4]/fullhash.suffix)`
+
+### Example
+
+Given an image `doc-0001-01.png` with SHA256 `a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6`:
+
+- Location: `assets/a1/b2/a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6.png`
+- Markdown reference: `![](assets/a1/b2/a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6.png)`
+
+### Deduplication
+
+If two images in the source have identical content, they will resolve to the same hash. The second occurrence will detect the target file already exists and skip the `shutil.move`, but still update the markdown reference to point to the same path.
+
+### Directory structure change from current
+
+Current (sequential):
+```
+assets/<docname>/
+├── image-001.png
+├── image-002.png
+```
+
+New (hash, default):
+```
+assets/
+├── a1/
+│   └── b2/
+│       └── a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6.png
+└── ff/
+    └── ee/
+        └── ffee...png
+```
+
+> **Note:** The per-document subdirectory (`<docname>/`) is removed in hash mode — all images share the flat two-level hash structure under `assets/`.
+
+---
+
+## `--assets-seqname` mode (sequential, current behavior)
+
+When `--assets-seqname` is set:
+
+1. Uses the current `image-001.png`, `image-002.png`, ... naming
+2. Images are placed in `assets/<output_layout_rel>/<docname>/` (unchanged from current)
+3. Behaves identically to today's `extract_and_relink_images()`
+
+---
+
+## CLI changes
+
+### `convert` command
+
+```python
+def convert(
+    source_file: str = typer.Argument(...),
+    output_dir: str = typer.Option("raw"),
+    assets_seqname: bool = typer.Option(False, "--assets-seqname", help="Use sequential naming for asset images"),
+):
+```
+
+### `bulk_convert` command
+
+```python
+def bulk_convert(
+    recursive: str = typer.Option(None, "-r", "--recursive"),
+    file_list: str = typer.Option(None, "-f", "--file-list"),
+    output_dir: str = typer.Option("raw"),
+    output_layout: str = typer.Option("flat", "--output-layout"),
+    assets_seqname: bool = typer.Option(False, "--assets-seqname", help="Use sequential naming for asset images"),
+):
+```
+
+### Propagation path
+
+```
+convert / bulk_convert
+  └─ assets_seqname (CLI parameter)
+      └─ convert_single(assets_seqname=...)
+          └─ _convert_pdf(src, assets_dir, assets_seqname)
+             └─ extract_and_relink_images(md, src_dir, dst_dir, sequential=...)
+          └─ _convert_docx(src, assets_dir, assets_seqname)
+             └─ extract_and_relink_images(md, src_dir, dst_dir, sequential=...)
+```
+
+---
+
+## Files to modify
+
+| File | Change |
+|------|--------|
+| `src/kbmate_cli/main.py` | Add `assets_seqname` to `convert`, `bulk_convert`, `convert_single`, `_convert_pdf`, `_convert_docx` |
+| `src/kbmate_cli/image_helper.py` | Add `sequential: bool = False` to `extract_and_relink_images()`; implement hash-naming path |
+| `tests/test_image_helper.py` | Add test cases for hash naming with directory structure |
+| `tests/test_cli.py` | Add CLI integration tests (if applicable) |
+
+---
+
+## `image_helper.py` — new `extract_and_relink_images` signature
+
+```python
+def extract_and_relink_images(
+    markdown: str,
+    src_dir: str,
+    dst_dir: str,
+    *,
+    sequential: bool = False,
+) -> str:
+```
+
+When `sequential=True`:
+- Use current counter-based naming (`image-001.png`)
+
+When `sequential=False` (default):
+- For each image, compute SHA256 of file contents
+- Construct path: `dst_dir / hash[:2] / hash[2:4] / hash{suffix}`
+- Handle deduplication (skip move if target exists)
+- Update markdown references accordingly
+
+---
+
+## Testing
+
+- Test hash naming produces correct directory structure: `hash[:2] / hash[2:4] / fullhash.png`
+- Test deduplication: two identical images → single file on disk, correct references
+- Test `--assets-seqname` flag restores sequential behavior (existing tests still pass)
+- Test hash collision handling
+- Test markdown refs are correctly rewritten

From 36eaed0b3808614f17ebd166bea4dd7014e1a6f1 Mon Sep 17 00:00:00 2001
From: Michael Ding <yandy.ding@gmail.com>
Date: Thu, 28 May 2026 13:16:30 +0800
Subject: [PATCH 4/5] chore: bump version to 0.4.0

---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 85774b1..5497f1b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "kbmate-cli"
-version = "0.3.0"
+version = "0.4.0"
 description = "CLI tools for knowledge base management"
 readme = "README.md"
 requires-python = ">=3.12"
diff --git a/uv.lock b/uv.lock
index 19855c5..299a6fd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -135,7 +135,7 @@ wheels = [
 
 [[package]]
 name = "kbmate-cli"
-version = "0.3.0"
+version = "0.4.0"
 source = { editable = "." }
 dependencies = [
     { name = "pymupdf4llm" },

From 147f9121102e94afa9a3efa8245fd30593395114 Mon Sep 17 00:00:00 2001
From: Michael Ding <yandy.ding@gmail.com>
Date: Thu, 28 May 2026 13:43:11 +0800
Subject: [PATCH 5/5] fix: rm valueless test case

---
 tests/test_cli.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 8d753f0..dfda1a3 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -36,18 +36,6 @@ def test_convert_lunwen_pdf():
     assert result.exit_code == 0
 
 
-def test_convert_assets_seqname_flag_in_help():
-    result = runner.invoke(app, ["convert", "--help"])
-    assert result.exit_code == 0
-    assert "--assets-seqname" in result.stdout
-
-
-def test_bulk_convert_assets_seqname_flag_in_help():
-    result = runner.invoke(app, ["bulk-convert", "--help"])
-    assert result.exit_code == 0
-    assert "--assets-seqname" in result.stdout
-
-
 @patch.dict("kbmate_cli.main._CONVERTERS", {".pdf": MagicMock(return_value="# mock")})
 def test_bulk_convert_assets_seqname_flag():
     with tempfile.TemporaryDirectory() as tmp: