diff --git a/semantic_code_intelligence/cli/commands/init_cmd.py b/semantic_code_intelligence/cli/commands/init_cmd.py index d582bd8..60e9a31 100644 --- a/semantic_code_intelligence/cli/commands/init_cmd.py +++ b/semantic_code_intelligence/cli/commands/init_cmd.py @@ -10,10 +10,15 @@ from semantic_code_intelligence.config.settings import ( AppConfig, init_project, + load_config, save_config, ) from semantic_code_intelligence.embeddings.model_registry import ( CLI_PROFILE_CHOICES, + CORE_PROFILES, + MODEL_PROFILES, + ModelProfile, + PROFILE_ALIASES, recommend_profile_for_ram, resolve_profile, ) @@ -30,6 +35,9 @@ print_success, print_warning, ) +from rich.console import Console +from rich.panel import Panel +from rich.table import Table logger = get_logger("cli.init") @@ -102,8 +110,14 @@ def _generate_vscode_mcp_config(root: Path) -> bool: "Size aliases (small/base/large) and named aliases (default/quality/code) are supported." ), ) +@click.option( + "--interactive/--no-interactive", + "interactive", + default=False, + help="Launch the interactive installer to choose the embedding model and batch size.", +) @click.pass_context -def init_cmd(ctx: click.Context, path: str, auto_index: bool, setup_vscode: bool, profile_name: str | None) -> None: +def init_cmd(ctx: click.Context, path: str, auto_index: bool, setup_vscode: bool, profile_name: str | None, interactive: bool) -> None: """Initialize a project for semantic code indexing. Creates a .codexa/ directory with default configuration and an empty index. @@ -117,27 +131,37 @@ def init_cmd(ctx: click.Context, path: str, auto_index: bool, setup_vscode: bool """ root = Path(path).resolve() - # Check if already initialized config_dir = AppConfig.config_dir(root) - if config_dir.exists(): - print_info(f"Project already initialized at {root}") - print_info(f"Config directory: {config_dir}") - # Still allow --vscode and --index on existing projects - if setup_vscode: - if _generate_vscode_mcp_config(root): - print_success("VS Code MCP config written to .vscode/settings.json") - else: - print_info("VS Code MCP config already exists") - if auto_index: - _run_index(root) - return - try: - config, config_path = init_project(root) - print_success(f"Initialized project at {root}") - print_info(f"Config file: {config_path}") - print_info(f"Index directory: {AppConfig.index_dir(root)}") - logger.debug("Default config: %s", config.model_dump()) + if config_dir.exists(): + if not interactive: + print_info(f"Project already initialized at {root}") + print_info(f"Config directory: {config_dir}") + # Still allow --vscode and --index on existing projects + if setup_vscode: + if _generate_vscode_mcp_config(root): + print_success("VS Code MCP config written to .vscode/settings.json") + else: + print_info("VS Code MCP config already exists") + if auto_index: + _run_index(root) + return + + try: + config = load_config(root) + except (json.JSONDecodeError, ValueError, OSError) as e: + print_error("Failed to read existing .codexa/config.json. Please fix or delete it and rerun 'codexa init'.") + print_error(f"Details: {e}") + ctx.exit(1) + return + print_info(f"Project already initialized at {root}") + print_info("Launching interactive installer to update configuration.") + else: + config, config_path = init_project(root) + print_success(f"Initialized project at {root}") + print_info(f"Config file: {config_path}") + print_info(f"Index directory: {AppConfig.index_dir(root)}") + logger.debug("Default config: %s", config.model_dump()) except OSError as e: print_error(f"Failed to initialize project: {e}") ctx.exit(1) @@ -149,48 +173,63 @@ def init_cmd(ctx: click.Context, path: str, auto_index: bool, setup_vscode: bool available_memory / BYTES_PER_GB if available_memory is not None else None ) - # Apply model profile (explicit or RAM-auto-detected) - profile = None + recommended_profile = None if profile_name: - profile = resolve_profile(profile_name) + recommended_profile = resolve_profile(profile_name) elif available_gb is not None: - profile = recommend_profile_for_ram(available_gb) - print_info(f"Detected {available_gb:.1f} GB available RAM → using '{profile.name}' profile ({profile.label})") - - profile_changed = False - if profile: - if config.embedding.model_name != profile.model_name: - config.embedding.model_name = profile.model_name - profile_changed = True - print_success(f"Model profile: {profile.label} → {profile.model_name}") - print_info(f" {profile.description}") + recommended_profile = recommend_profile_for_ram(available_gb) recommended_batch_size = recommend_batch_size(available_memory, logical_cpu_count) - batch_changed = recommended_batch_size != config.embedding.batch_size - if batch_changed: - config.embedding.batch_size = recommended_batch_size - resource_parts: list[str] = [] - if available_gb is not None: - resource_parts.append(f"{available_gb:.1f} GB RAM") - if logical_cpu_count is not None: - core_label = "CPU core" if logical_cpu_count == 1 else "CPU cores" - resource_parts.append(f"{logical_cpu_count} {core_label}") - - batch_message_prefix = ( - f"Embedding batch size {'updated' if batch_changed else 'kept'} " - f"at {config.embedding.batch_size}" - ) - if resource_parts: - print_info( - f"{batch_message_prefix} (based on {', '.join(resource_parts)})" + if interactive: + profile_changed, batch_changed = _run_interactive_installer( + config=config, + available_gb=available_gb, + cpu_count=logical_cpu_count, + default_profile=recommended_profile or MODEL_PROFILES["balanced"], + recommended_batch_size=recommended_batch_size, ) + should_save = profile_changed or batch_changed else: - print_info( - f"{batch_message_prefix} (using default recommendation)" + # Apply model profile (explicit or RAM-auto-detected) + profile = recommended_profile + profile_changed = False + if profile: + if profile_name is None and available_gb is not None: + print_info(f"Detected {available_gb:.1f} GB available RAM → using '{profile.name}' profile ({profile.label})") + + if config.embedding.model_name != profile.model_name: + config.embedding.model_name = profile.model_name + profile_changed = True + print_success(f"Model profile: {profile.label} → {profile.model_name}") + print_info(f" {profile.description}") + + batch_changed = recommended_batch_size != config.embedding.batch_size + if batch_changed: + config.embedding.batch_size = recommended_batch_size + + resource_parts: list[str] = [] + if available_gb is not None: + resource_parts.append(f"{available_gb:.1f} GB RAM") + if logical_cpu_count is not None: + core_label = "CPU core" if logical_cpu_count == 1 else "CPU cores" + resource_parts.append(f"{logical_cpu_count} {core_label}") + + batch_message_prefix = ( + f"Embedding batch size {'updated' if batch_changed else 'kept'} " + f"at {config.embedding.batch_size}" ) + if resource_parts: + print_info( + f"{batch_message_prefix} (based on {', '.join(resource_parts)})" + ) + else: + print_info( + f"{batch_message_prefix} (using default recommendation)" + ) + + should_save = profile_changed or batch_changed - should_save = profile_changed or batch_changed if should_save: save_config(config, root) @@ -210,6 +249,98 @@ def init_cmd(ctx: click.Context, path: str, auto_index: bool, setup_vscode: bool print_info(" .codexaignore — Exclude secrets or generated files from indexing") +def _run_interactive_installer( + config: AppConfig, + available_gb: float | None, + cpu_count: int | None, + default_profile: ModelProfile, + recommended_batch_size: int, +) -> tuple[bool, bool]: + """Launch a text-based interactive installer for model and batch settings.""" + console = Console() + console.print() + console.print(Panel.fit("[bold cyan]CodexA Interactive Installer[/bold cyan]\nConfigure embedding defaults for your project.", border_style="cyan")) + + # Resource summary and suggestions + resource_lines: list[str] = [] + if available_gb is not None: + resource_lines.append(f"[green]{available_gb:.1f} GB[/green] available RAM detected") + if cpu_count is not None: + resource_lines.append(f"[green]{cpu_count} CPU cores[/green] detected") + if resource_lines: + console.print(" • ".join(resource_lines)) + console.print(f"Suggested profile: [bold]{default_profile.label}[/bold]") + console.print(f"Suggested batch size: [bold]{recommended_batch_size}[/bold]") + else: + console.print("System resources could not be detected; keeping safe defaults.") + + # Show model options + table = Table(title="Embedding Profiles", show_lines=True) + table.add_column("Key", justify="center", style="cyan", no_wrap=True) + table.add_column("Label") + table.add_column("Model") + table.add_column("Description") + table.add_column("Min RAM (GB)", justify="right") + for key in CORE_PROFILES: + profile = MODEL_PROFILES[key] + table.add_row( + profile.name, + profile.label, + profile.model_name, + profile.description, + f"{profile.min_ram_gb:.1f}", + ) + console.print(table) + + chosen_profile_key = click.prompt( + "Select embedding profile", + type=click.Choice(CLI_PROFILE_CHOICES, case_sensitive=False), + default=default_profile.name, + show_choices=False, + ) + chosen_profile = resolve_profile(chosen_profile_key) + if chosen_profile is None: + valid_profiles = sorted(set(MODEL_PROFILES.keys()) | set(PROFILE_ALIASES.keys())) + raise click.ClickException( + f"Profile '{chosen_profile_key}' could not be resolved. " + f"Valid profiles are: {', '.join(valid_profiles)}." + ) + + profile_changed = False + if config.embedding.model_name != chosen_profile.model_name: + config.embedding.model_name = chosen_profile.model_name + profile_changed = True + + console.print() + console.print( + Panel.fit( + f"[bold]Batch size[/bold] controls how many chunks are embedded at once.\n" + f"Recommended: [cyan]{recommended_batch_size}[/cyan] (based on detection).", + border_style="cyan", + ) + ) + + batch_input = click.prompt( + "Embedding batch size", + default=recommended_batch_size, + type=click.IntRange(1, 1024), + show_default=True, + ) + batch_changed = batch_input != config.embedding.batch_size + config.embedding.batch_size = batch_input + + console.print() + console.print( + Panel.fit( + f"Using profile [green]{chosen_profile.label}[/green] ({chosen_profile.model_name}) " + f"with batch size [green]{config.embedding.batch_size}[/green].", + border_style="green", + ) + ) + + return profile_changed, batch_changed + + def _run_index(root: Path) -> None: """Run indexing as part of init.""" from semantic_code_intelligence.services.indexing_service import index_project diff --git a/semantic_code_intelligence/tests/test_cli.py b/semantic_code_intelligence/tests/test_cli.py index 3b24304..cc9eda4 100644 --- a/semantic_code_intelligence/tests/test_cli.py +++ b/semantic_code_intelligence/tests/test_cli.py @@ -110,6 +110,80 @@ def test_init_saves_recommended_batch_size(self, runner: CliRunner, tmp_path: Pa # Profile for ~3GB RAM should be precise according to registry thresholds assert config["embedding"]["model_name"] == "jinaai/jina-embeddings-v2-base-code" + def test_init_interactive_applies_selections(self, runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + # Provide stable detection so defaults are deterministic + monkeypatch.setattr( + "semantic_code_intelligence.cli.commands.init_cmd._get_available_memory_bytes", + lambda: 5 * BYTES_PER_GB, + ) + monkeypatch.setattr( + "semantic_code_intelligence.cli.commands.init_cmd._get_cpu_count", + lambda: 4, + ) + + result = runner.invoke( + cli, + ["init", str(tmp_path), "--interactive"], + input="fast\n24\n", + ) + assert result.exit_code == 0 + output = result.output.lower() + assert "interactive installer" in output + + config = json.loads((tmp_path / ".codexa" / "config.json").read_text(encoding="utf-8")) + assert config["embedding"]["model_name"] == MODEL_PROFILES["fast"].model_name + assert config["embedding"]["batch_size"] == 24 + + def test_init_interactive_updates_existing_project(self, runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + # Initial setup with defaults + monkeypatch.setattr( + "semantic_code_intelligence.cli.commands.init_cmd._get_available_memory_bytes", + lambda: 2 * BYTES_PER_GB, + ) + monkeypatch.setattr( + "semantic_code_intelligence.cli.commands.init_cmd._get_cpu_count", + lambda: 2, + ) + runner.invoke(cli, ["init", str(tmp_path)]) + + # Run interactive to change profile/batch + result = runner.invoke( + cli, + ["init", str(tmp_path), "--interactive"], + input="precise\n16\n", + ) + assert result.exit_code == 0 + + config = json.loads((tmp_path / ".codexa" / "config.json").read_text(encoding="utf-8")) + assert config["embedding"]["model_name"] == MODEL_PROFILES["precise"].model_name + assert config["embedding"]["batch_size"] == 16 + + def test_init_interactive_invalid_config(self, runner: CliRunner, tmp_path: Path): + runner.invoke(cli, ["init", str(tmp_path)]) + config_path = tmp_path / ".codexa" / "config.json" + config_path.write_text("{ invalid json", encoding="utf-8") + + result = runner.invoke(cli, ["init", str(tmp_path), "--interactive"]) + + assert result.exit_code != 0 + assert "failed to read existing .codexa/config.json" in result.output.lower() + + def test_init_interactive_config_permission_error(self, runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + runner.invoke(cli, ["init", str(tmp_path)]) + + def raise_permission_error(*args, **kwargs): + raise OSError(errno.EACCES, "permission denied") + + monkeypatch.setattr( + "semantic_code_intelligence.cli.commands.init_cmd.load_config", + raise_permission_error, + ) + + result = runner.invoke(cli, ["init", str(tmp_path), "--interactive"]) + + assert result.exit_code != 0 + assert "failed to read existing .codexa/config.json" in result.output.lower() + class TestIndexCommand: """Tests for the index command."""