Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 184 additions & 53 deletions semantic_code_intelligence/cli/commands/init_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@
from semantic_code_intelligence.config.settings import (
AppConfig,
init_project,
load_config,
save_config,
)
from semantic_code_intelligence.embeddings.model_registry import (
CLI_PROFILE_CHOICES,
CORE_PROFILES,
MODEL_PROFILES,
ModelProfile,
PROFILE_ALIASES,
recommend_profile_for_ram,
resolve_profile,
)
Expand All @@ -30,6 +35,9 @@
print_success,
print_warning,
)
from rich.console import Console
from rich.panel import Panel
from rich.table import Table

logger = get_logger("cli.init")

Expand Down Expand Up @@ -102,8 +110,14 @@ def _generate_vscode_mcp_config(root: Path) -> bool:
"Size aliases (small/base/large) and named aliases (default/quality/code) are supported."
),
)
@click.option(
"--interactive/--no-interactive",
"interactive",
default=False,
help="Launch the interactive installer to choose the embedding model and batch size.",
)
@click.pass_context
def init_cmd(ctx: click.Context, path: str, auto_index: bool, setup_vscode: bool, profile_name: str | None) -> None:
def init_cmd(ctx: click.Context, path: str, auto_index: bool, setup_vscode: bool, profile_name: str | None, interactive: bool) -> None:
"""Initialize a project for semantic code indexing.

Creates a .codexa/ directory with default configuration and an empty index.
Expand All @@ -117,27 +131,37 @@ def init_cmd(ctx: click.Context, path: str, auto_index: bool, setup_vscode: bool
"""
root = Path(path).resolve()

# Check if already initialized
config_dir = AppConfig.config_dir(root)
if config_dir.exists():
print_info(f"Project already initialized at {root}")
print_info(f"Config directory: {config_dir}")
# Still allow --vscode and --index on existing projects
if setup_vscode:
if _generate_vscode_mcp_config(root):
print_success("VS Code MCP config written to .vscode/settings.json")
else:
print_info("VS Code MCP config already exists")
if auto_index:
_run_index(root)
return

try:
config, config_path = init_project(root)
print_success(f"Initialized project at {root}")
print_info(f"Config file: {config_path}")
print_info(f"Index directory: {AppConfig.index_dir(root)}")
logger.debug("Default config: %s", config.model_dump())
if config_dir.exists():
if not interactive:
print_info(f"Project already initialized at {root}")
print_info(f"Config directory: {config_dir}")
# Still allow --vscode and --index on existing projects
if setup_vscode:
if _generate_vscode_mcp_config(root):
print_success("VS Code MCP config written to .vscode/settings.json")
else:
print_info("VS Code MCP config already exists")
if auto_index:
_run_index(root)
return

try:
config = load_config(root)
except (json.JSONDecodeError, ValueError, OSError) as e:
print_error("Failed to read existing .codexa/config.json. Please fix or delete it and rerun 'codexa init'.")
print_error(f"Details: {e}")
ctx.exit(1)
return
print_info(f"Project already initialized at {root}")
print_info("Launching interactive installer to update configuration.")
Comment thread
M9nx marked this conversation as resolved.
else:
Comment thread
M9nx marked this conversation as resolved.
config, config_path = init_project(root)
print_success(f"Initialized project at {root}")
print_info(f"Config file: {config_path}")
print_info(f"Index directory: {AppConfig.index_dir(root)}")
logger.debug("Default config: %s", config.model_dump())
except OSError as e:
print_error(f"Failed to initialize project: {e}")
ctx.exit(1)
Expand All @@ -149,48 +173,63 @@ def init_cmd(ctx: click.Context, path: str, auto_index: bool, setup_vscode: bool
available_memory / BYTES_PER_GB if available_memory is not None else None
)

# Apply model profile (explicit or RAM-auto-detected)
profile = None
recommended_profile = None
if profile_name:
profile = resolve_profile(profile_name)
recommended_profile = resolve_profile(profile_name)
elif available_gb is not None:
profile = recommend_profile_for_ram(available_gb)
print_info(f"Detected {available_gb:.1f} GB available RAM → using '{profile.name}' profile ({profile.label})")

profile_changed = False
if profile:
if config.embedding.model_name != profile.model_name:
config.embedding.model_name = profile.model_name
profile_changed = True
print_success(f"Model profile: {profile.label} → {profile.model_name}")
print_info(f" {profile.description}")
recommended_profile = recommend_profile_for_ram(available_gb)

recommended_batch_size = recommend_batch_size(available_memory, logical_cpu_count)
batch_changed = recommended_batch_size != config.embedding.batch_size
if batch_changed:
config.embedding.batch_size = recommended_batch_size

resource_parts: list[str] = []
if available_gb is not None:
resource_parts.append(f"{available_gb:.1f} GB RAM")
if logical_cpu_count is not None:
core_label = "CPU core" if logical_cpu_count == 1 else "CPU cores"
resource_parts.append(f"{logical_cpu_count} {core_label}")

batch_message_prefix = (
f"Embedding batch size {'updated' if batch_changed else 'kept'} "
f"at {config.embedding.batch_size}"
)
if resource_parts:
print_info(
f"{batch_message_prefix} (based on {', '.join(resource_parts)})"
if interactive:
profile_changed, batch_changed = _run_interactive_installer(
config=config,
available_gb=available_gb,
cpu_count=logical_cpu_count,
default_profile=recommended_profile or MODEL_PROFILES["balanced"],
recommended_batch_size=recommended_batch_size,
)
should_save = profile_changed or batch_changed
else:
print_info(
f"{batch_message_prefix} (using default recommendation)"
# Apply model profile (explicit or RAM-auto-detected)
profile = recommended_profile
profile_changed = False
if profile:
if profile_name is None and available_gb is not None:
print_info(f"Detected {available_gb:.1f} GB available RAM → using '{profile.name}' profile ({profile.label})")

if config.embedding.model_name != profile.model_name:
config.embedding.model_name = profile.model_name
profile_changed = True
print_success(f"Model profile: {profile.label} → {profile.model_name}")
print_info(f" {profile.description}")

batch_changed = recommended_batch_size != config.embedding.batch_size
if batch_changed:
config.embedding.batch_size = recommended_batch_size

resource_parts: list[str] = []
if available_gb is not None:
resource_parts.append(f"{available_gb:.1f} GB RAM")
if logical_cpu_count is not None:
core_label = "CPU core" if logical_cpu_count == 1 else "CPU cores"
resource_parts.append(f"{logical_cpu_count} {core_label}")

batch_message_prefix = (
f"Embedding batch size {'updated' if batch_changed else 'kept'} "
f"at {config.embedding.batch_size}"
)
if resource_parts:
print_info(
f"{batch_message_prefix} (based on {', '.join(resource_parts)})"
)
else:
print_info(
f"{batch_message_prefix} (using default recommendation)"
)

should_save = profile_changed or batch_changed

should_save = profile_changed or batch_changed
if should_save:
save_config(config, root)

Expand All @@ -210,6 +249,98 @@ def init_cmd(ctx: click.Context, path: str, auto_index: bool, setup_vscode: bool
print_info(" .codexaignore — Exclude secrets or generated files from indexing")


def _run_interactive_installer(
config: AppConfig,
available_gb: float | None,
cpu_count: int | None,
default_profile: ModelProfile,
recommended_batch_size: int,
) -> tuple[bool, bool]:
"""Launch a text-based interactive installer for model and batch settings."""
console = Console()
console.print()
console.print(Panel.fit("[bold cyan]CodexA Interactive Installer[/bold cyan]\nConfigure embedding defaults for your project.", border_style="cyan"))

# Resource summary and suggestions
resource_lines: list[str] = []
if available_gb is not None:
resource_lines.append(f"[green]{available_gb:.1f} GB[/green] available RAM detected")
if cpu_count is not None:
resource_lines.append(f"[green]{cpu_count} CPU cores[/green] detected")
if resource_lines:
console.print(" • ".join(resource_lines))
console.print(f"Suggested profile: [bold]{default_profile.label}[/bold]")
console.print(f"Suggested batch size: [bold]{recommended_batch_size}[/bold]")
else:
console.print("System resources could not be detected; keeping safe defaults.")

# Show model options
table = Table(title="Embedding Profiles", show_lines=True)
table.add_column("Key", justify="center", style="cyan", no_wrap=True)
table.add_column("Label")
table.add_column("Model")
table.add_column("Description")
table.add_column("Min RAM (GB)", justify="right")
for key in CORE_PROFILES:
profile = MODEL_PROFILES[key]
Comment thread
M9nx marked this conversation as resolved.
table.add_row(
profile.name,
profile.label,
profile.model_name,
profile.description,
f"{profile.min_ram_gb:.1f}",
)
console.print(table)

chosen_profile_key = click.prompt(
"Select embedding profile",
type=click.Choice(CLI_PROFILE_CHOICES, case_sensitive=False),
default=default_profile.name,
show_choices=False,
)
Comment thread
M9nx marked this conversation as resolved.
chosen_profile = resolve_profile(chosen_profile_key)
if chosen_profile is None:
valid_profiles = sorted(set(MODEL_PROFILES.keys()) | set(PROFILE_ALIASES.keys()))
raise click.ClickException(
f"Profile '{chosen_profile_key}' could not be resolved. "
f"Valid profiles are: {', '.join(valid_profiles)}."
)

profile_changed = False
if config.embedding.model_name != chosen_profile.model_name:
config.embedding.model_name = chosen_profile.model_name
profile_changed = True
Comment thread
M9nx marked this conversation as resolved.

console.print()
console.print(
Panel.fit(
f"[bold]Batch size[/bold] controls how many chunks are embedded at once.\n"
f"Recommended: [cyan]{recommended_batch_size}[/cyan] (based on detection).",
border_style="cyan",
)
)

batch_input = click.prompt(
"Embedding batch size",
default=recommended_batch_size,
type=click.IntRange(1, 1024),
show_default=True,
)
batch_changed = batch_input != config.embedding.batch_size
config.embedding.batch_size = batch_input

console.print()
console.print(
Panel.fit(
f"Using profile [green]{chosen_profile.label}[/green] ({chosen_profile.model_name}) "
f"with batch size [green]{config.embedding.batch_size}[/green].",
border_style="green",
)
)

return profile_changed, batch_changed


def _run_index(root: Path) -> None:
"""Run indexing as part of init."""
from semantic_code_intelligence.services.indexing_service import index_project
Expand Down
74 changes: 74 additions & 0 deletions semantic_code_intelligence/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,80 @@ def test_init_saves_recommended_batch_size(self, runner: CliRunner, tmp_path: Pa
# Profile for ~3GB RAM should be precise according to registry thresholds
assert config["embedding"]["model_name"] == "jinaai/jina-embeddings-v2-base-code"

def test_init_interactive_applies_selections(self, runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
# Provide stable detection so defaults are deterministic
monkeypatch.setattr(
"semantic_code_intelligence.cli.commands.init_cmd._get_available_memory_bytes",
lambda: 5 * BYTES_PER_GB,
)
monkeypatch.setattr(
"semantic_code_intelligence.cli.commands.init_cmd._get_cpu_count",
lambda: 4,
)

result = runner.invoke(
cli,
["init", str(tmp_path), "--interactive"],
input="fast\n24\n",
)
assert result.exit_code == 0
output = result.output.lower()
assert "interactive installer" in output

config = json.loads((tmp_path / ".codexa" / "config.json").read_text(encoding="utf-8"))
assert config["embedding"]["model_name"] == MODEL_PROFILES["fast"].model_name
assert config["embedding"]["batch_size"] == 24

def test_init_interactive_updates_existing_project(self, runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
# Initial setup with defaults
monkeypatch.setattr(
"semantic_code_intelligence.cli.commands.init_cmd._get_available_memory_bytes",
lambda: 2 * BYTES_PER_GB,
)
monkeypatch.setattr(
"semantic_code_intelligence.cli.commands.init_cmd._get_cpu_count",
lambda: 2,
)
runner.invoke(cli, ["init", str(tmp_path)])

# Run interactive to change profile/batch
result = runner.invoke(
cli,
["init", str(tmp_path), "--interactive"],
input="precise\n16\n",
)
assert result.exit_code == 0

config = json.loads((tmp_path / ".codexa" / "config.json").read_text(encoding="utf-8"))
assert config["embedding"]["model_name"] == MODEL_PROFILES["precise"].model_name
assert config["embedding"]["batch_size"] == 16

def test_init_interactive_invalid_config(self, runner: CliRunner, tmp_path: Path):
runner.invoke(cli, ["init", str(tmp_path)])
config_path = tmp_path / ".codexa" / "config.json"
config_path.write_text("{ invalid json", encoding="utf-8")

result = runner.invoke(cli, ["init", str(tmp_path), "--interactive"])

assert result.exit_code != 0
assert "failed to read existing .codexa/config.json" in result.output.lower()

def test_init_interactive_config_permission_error(self, runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
runner.invoke(cli, ["init", str(tmp_path)])

def raise_permission_error(*args, **kwargs):
raise OSError(errno.EACCES, "permission denied")

monkeypatch.setattr(
"semantic_code_intelligence.cli.commands.init_cmd.load_config",
raise_permission_error,
)

result = runner.invoke(cli, ["init", str(tmp_path), "--interactive"])

assert result.exit_code != 0
assert "failed to read existing .codexa/config.json" in result.output.lower()


class TestIndexCommand:
"""Tests for the index command."""
Expand Down
Loading