From da1112809b7ae4b2b3a3ec03c0cc0398974ccea9 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Tue, 12 May 2026 18:53:21 +0200 Subject: [PATCH 1/9] operations and auto-completions in shell --- contree_cli/arguments.py | 10 + contree_cli/cli/images.py | 3 +- contree_cli/cli/operation.py | 157 +++++++++ contree_cli/cli/ps.py | 42 ++- contree_cli/cli/run.py | 32 +- contree_cli/mapped_file.py | 12 +- contree_cli/session.py | 41 ++- contree_cli/shell/argmap.py | 69 ++++ contree_cli/shell/argspec.py | 343 +++++++++++++++++++ contree_cli/shell/cache.py | 109 ++++++ contree_cli/shell/completer.py | 600 +++++++++++++-------------------- contree_cli/shell/repl.py | 146 +++++++- contree_cli/shell/sources.py | 491 +++++++++++++++++++++++++++ docs/commands/index.md | 1 + docs/commands/kill.md | 1 + docs/commands/operation.md | 123 +++++++ docs/commands/ps.md | 7 + docs/commands/shell.md | 63 +++- docs/commands/show.md | 1 + docs/tutorial/shell.md | 45 ++- tests/conftest.py | 1 + tests/test_argmap.py | 150 +++++++++ tests/test_operation.py | 307 +++++++++++++++++ tests/test_ps.py | 70 ++++ tests/test_run.py | 86 ++++- tests/test_session.py | 20 ++ tests/test_shell_completer.py | 281 ++++++++++++++- tests/test_shell_repl.py | 165 +++++++-- 28 files changed, 2954 insertions(+), 422 deletions(-) create mode 100644 contree_cli/cli/operation.py create mode 100644 contree_cli/shell/argmap.py create mode 100644 contree_cli/shell/argspec.py create mode 100644 contree_cli/shell/cache.py create mode 100644 contree_cli/shell/sources.py create mode 100644 docs/commands/operation.md create mode 100644 tests/test_argmap.py create mode 100644 tests/test_operation.py diff --git a/contree_cli/arguments.py b/contree_cli/arguments.py index 1111aea..98d5773 100644 --- a/contree_cli/arguments.py +++ b/contree_cli/arguments.py @@ -14,6 +14,7 @@ images, kill, ls, + operation, ps, run, session, @@ -44,6 +45,9 @@ contree run --file ./src:/app/src -- make -C /app/src contree images --prefix=ubuntu contree ps -q + contree op ls same as `contree ps` + contree op show UUID1 UUID2 multi-UUID show + contree op cancel UUID1 UUID2 multi-UUID cancel (or --all) contree show OPERATION_UUID contree tag IMAGE_UUID latest contree ls /etc list files in session image @@ -211,6 +215,12 @@ def register( register("ps", "List operations/instances", ps.setup_parser) register("kill", "Cancel an operation", kill.setup_parser) register("show", "Show operation result", show.setup_parser) +register( + "operation", + "Manage operations (list/show/cancel)", + operation.setup_parser, + aliases=["op"], +) register("ls", "List files in image", ls.setup_parser) register("cat", "Show file content from image", cat.setup_parser) register("cp", "Copy file from image to local path", cp.setup_parser) diff --git a/contree_cli/cli/images.py b/contree_cli/cli/images.py index 6a652c3..6bb4947 100644 --- a/contree_cli/cli/images.py +++ b/contree_cli/cli/images.py @@ -202,7 +202,8 @@ def _add_list_args(p: argparse.ArgumentParser) -> None: def setup_parser(p: argparse.ArgumentParser) -> SetupResult: - # Parent-level args for backward compat (contree images --prefix …) + # Parent-level list args mirror the subcommand so `contree images + # --prefix …` works without typing `list`. _add_list_args(p) sub = p.add_subparsers(dest="images_action") diff --git a/contree_cli/cli/operation.py b/contree_cli/cli/operation.py new file mode 100644 index 0000000..8a4a9be --- /dev/null +++ b/contree_cli/cli/operation.py @@ -0,0 +1,157 @@ +"""Manage operations (list, inspect, cancel). + +Aggregates ps/show/kill under a single namespace, and adds multi-UUID +support to ``show`` and ``cancel`` so several operations can be acted +on in one invocation. + +Subcommands: + list (ls) List operations. Same flags as `contree ps`. + show UUID [UUID...] Show one or more operation results. + cancel UUID [UUID...] Cancel one or more operations (or --all). +""" + +from __future__ import annotations + +import argparse +import logging +from dataclasses import dataclass, field + +from contree_cli import CLIENT, ArgumentsProtocol, SetupResult +from contree_cli.cli import kill as kill_module +from contree_cli.cli import ps as ps_module +from contree_cli.cli.show import ShowArgs, cmd_show +from contree_cli.client import ApiError +from contree_cli.types import FLAGS + +logger = logging.getLogger(__name__) + +EPILOG = """\ +for coding agents: + list/show are read-only; cancel mutates remote state + show and cancel accept multiple UUIDs in one invocation + show supports @N session-history references inherited from `contree show` +""" + + +@dataclass(frozen=True) +class ShowMultiArgs(ArgumentsProtocol): + uuids: list[str] = field(default_factory=list) + + @classmethod + def from_args(cls, ns: argparse.Namespace) -> ShowMultiArgs: + return cls(uuids=list(ns.uuids)) + + +@dataclass(frozen=True) +class CancelArgs(ArgumentsProtocol): + uuids: list[str] = field(default_factory=list) + all: bool = False + + @classmethod + def from_args(cls, ns: argparse.Namespace) -> CancelArgs: + return cls(uuids=list(ns.uuids or []), all=ns.all) + + +def setup_parser(p: argparse.ArgumentParser) -> SetupResult: + sub = p.add_subparsers(dest="operation_action", required=True) + + list_p = sub.add_parser( + "list", + aliases=["ls"], + help="List operations", + description=("List operations. Accepts the same flags as `contree ps`."), + epilog="for coding agents: read-only command", + ) + list_handler, list_loader = ps_module.setup_parser(list_p) + list_p.set_defaults(handler=list_handler, load_args=list_loader) + + show_p = sub.add_parser( + "show", + help="Show one or more operation results", + description=( + "Fetch and display the result of each given operation. " + "Same per-UUID behaviour as `contree show`: terminal results " + "are cached; @N references resolve against session history." + ), + epilog=( + "for coding agents:\n" + " read-only command\n" + " accepts multiple UUIDs; each rendered as its own row" + ), + ) + show_p.add_argument( + "uuids", + nargs="+", + metavar="UUID", + help="One or more operation UUIDs (or @N history references)", + ) + show_p.set_defaults(handler=cmd_show_multi, load_args=ShowMultiArgs) + + cancel_p = sub.add_parser( + "cancel", + help="Cancel one or more operations", + description=( + "Cancel each given operation. With --all, cancels every active " + "operation (PENDING, ASSIGNED, EXECUTING)." + ), + epilog=( + "for coding agents:\n" + " mutating command\n" + " pass UUIDs to cancel specific operations or --all for everything" + ), + ) + cancel_p.add_argument( + "uuids", + nargs="*", + metavar="UUID", + help="Operation UUIDs to cancel", + ) + cancel_p.add_argument( + *FLAGS["all"], + action="store_true", + help="Cancel every active operation", + ) + cancel_p.set_defaults(handler=cmd_cancel, load_args=CancelArgs) + + return cmd_show_multi, ShowMultiArgs + + +def cmd_show_multi(args: ShowMultiArgs) -> int | None: + exit_code = 0 + for uuid in args.uuids: + try: + result = cmd_show(ShowArgs(uuid=uuid)) + except ApiError as exc: + logger.error("Failed to fetch %s: %s", uuid, exc) + exit_code = max(exit_code, 1) + continue + if isinstance(result, int) and result: + exit_code = max(exit_code, result) + return exit_code or None + + +def cmd_cancel(args: CancelArgs) -> int | None: + client = CLIENT.get() + + if args.all: + if args.uuids: + logger.warning("--all overrides explicit UUIDs; cancelling all active") + uuids = kill_module._list_active(client) + if not uuids: + logger.info("No active operations to cancel") + return None + else: + if not args.uuids: + logger.error("Provide at least one UUID, or use --all") + return 1 + uuids = args.uuids + + failed = 0 + for uuid in uuids: + try: + client.delete(f"/v1/operations/{uuid}") + logger.info("Cancelled operation %s", uuid) + except ApiError as exc: + logger.error("Failed to cancel %s: %s", uuid, exc) + failed += 1 + return 1 if failed else None diff --git a/contree_cli/cli/ps.py b/contree_cli/cli/ps.py index b3908f5..90fbc67 100644 --- a/contree_cli/cli/ps.py +++ b/contree_cli/cli/ps.py @@ -119,21 +119,37 @@ def setup_parser(p: argparse.ArgumentParser) -> SetupResult: return cmd_ps, PsArgs +DATETIME_FIELDS = frozenset({"created_at", "started_at", "finished_at", "updated_at"}) + + +def transform_field(key: str, value: Any) -> Any: + """Light-touch typing for known fields, pass-through for everything else.""" + if value is None: + return "" if key == "error" else None + if key in DATETIME_FIELDS: + return parse_datetime(value) + if key == "duration": + return timedelta(seconds=value) + return value + + def emit_op(formatter: OutputFormatter, op: dict[str, Any], *, quiet: bool) -> None: - row = dict( - uuid=op["uuid"], - status=op["status"], - kind=op["kind"], - created_at=parse_datetime(op["created_at"]), - duration=timedelta(seconds=op["duration"]) - if op.get("duration") is not None - else None, - error=op.get("error") or "", - ) if quiet: - print(row["uuid"]) - else: - formatter(**row) + print(op["uuid"]) + return + # Take every scalar top-level field from the API response so new server + # fields show up automatically. Nested structures (metadata, result) are + # skipped to keep the table readable -- use `show UUID` for the detail + # view that drills into them. ``error`` is pinned to the last column + # because it can be a long free-form message and trailing it keeps the + # rest of the row aligned. + row = { + key: transform_field(key, value) + for key, value in op.items() + if key != "error" and not isinstance(value, (dict, list)) + } + row["error"] = transform_field("error", op.get("error")) + formatter(**row) def cmd_ps(args: PsArgs) -> None: diff --git a/contree_cli/cli/run.py b/contree_cli/cli/run.py index b1ec395..deb0d81 100644 --- a/contree_cli/cli/run.py +++ b/contree_cli/cli/run.py @@ -441,9 +441,13 @@ def _build_payload( ) -> dict[str, object]: """Build the JSON payload for POST /v1/instances.""" if args.shell: - # In shell mode the API runs `sh -c `, so we must - # rebuild the original argv into a shell-safe expression. - command = shlex.join(args.command_args) + # API runs `sh -c `. A single arg is already a shell + # expression (the user pre-quoted it: `run -s -- 'a ; b'`), so + # passing it through verbatim preserves operators like `;`, `&&`, + # `|`. Multiple args are individual tokens that need joining with + # quoting to preserve argument boundaries. + parts = args.command_args + command = parts[0] if len(parts) == 1 else shlex.join(parts) else: # In non-shell mode the API exec's command + args directly, # JSON list elements preserve boundaries, no quoting needed. @@ -710,7 +714,21 @@ def _norm(item: object) -> dict[str, object]: # 6. Cache terminal operation result store.cache[(op_uuid, "operation")] = op - if op["status"] != "SUCCESS": + metadata = op.get("metadata") or {} + assert isinstance(metadata, dict) + instance_result = metadata.get("result") or {} + assert isinstance(instance_result, dict) + state = instance_result.get("state") or {} + assert isinstance(state, dict) + timed_out = bool(state.get("timed_out")) + + if timed_out: + logger.warning( + "Operation %s timed out after %ss", + op_uuid, + args.timeout if args.timeout is not None else "?", + ) + elif op["status"] != "SUCCESS": logger.fatal( "Operation %s ended with status %s%s", op_uuid, @@ -744,12 +762,6 @@ def _norm(item: object) -> dict[str, object]: title = " ".join(args.command_args) if args.command_args else "" store.create_disposable_branch(op_uuid, title) - metadata = op.get("metadata") or {} - assert isinstance(metadata, dict) - instance_result = metadata.get("result") or {} - assert isinstance(instance_result, dict) - state = instance_result.get("state") or {} - assert isinstance(state, dict) exit_code = state.get("exit_code") if isinstance(exit_code, int): return exit_code diff --git a/contree_cli/mapped_file.py b/contree_cli/mapped_file.py index 6fdaacd..8195891 100644 --- a/contree_cli/mapped_file.py +++ b/contree_cli/mapped_file.py @@ -29,7 +29,7 @@ def sha256(self) -> str: @classmethod def parse(cls, spec: str) -> "MappedFile": drive = pathlib.PurePath(spec).drive - parts = spec[len(drive) :].split(":") + parts = split_mapped_value(spec[len(drive) :]) if not parts or not (drive + parts[0]): raise ValueError(f"invalid file spec {spec!r}: host_path is required") @@ -166,3 +166,13 @@ def _parse_mode(value: str) -> int: return int(value, 8) except ValueError: return 0 + + +def split_mapped_value(text: str) -> list[str]: + """Split a ``--file`` value on ``:`` without validation or stat. + + Used by tab completion to figure out which segment is being typed + (host path / instance path / u/g/m tag) without raising on partial + or syntactically odd input. + """ + return text.split(":") diff --git a/contree_cli/session.py b/contree_cli/session.py index f98c6c2..ba7cbcf 100644 --- a/contree_cli/session.py +++ b/contree_cli/session.py @@ -14,6 +14,7 @@ from pathlib import Path, PurePosixPath CONTREE_CONCURRENCY = int(os.getenv("CONTREE_CONCURRENCY", "8")) +CONTREE_DB_TIMEOUT = float(os.getenv("CONTREE_DB_TIMEOUT", "30")) @dataclass(frozen=True) @@ -161,7 +162,7 @@ def _decode(blob: str) -> object: return json.loads( gzip.decompress(base64.b64decode(blob[5:])), ) - # Legacy: no prefix — plain JSON (written before this change). + # No prefix: plain JSON payload. return json.loads(blob) def __getitem__(self, key: CacheKey) -> object: @@ -216,6 +217,35 @@ def __len__(self) -> int: assert row is not None return row[0] # type: ignore[no-any-return] + def invalidate_prefix( + self, + *, + image_prefix: str | None = None, + kind_prefix: str | None = None, + ) -> int: + """Drop cache entries by image_uuid prefix and/or kind prefix. + + Returns the number of rows removed. ``image_prefix`` and + ``kind_prefix`` may be combined; both default to "match anything" + when omitted (caller must pass at least one). + """ + if image_prefix is None and kind_prefix is None: + raise ValueError("invalidate_prefix needs image_prefix or kind_prefix") + clauses: list[str] = [] + params: list[object] = [] + if image_prefix is not None: + clauses.append("image_uuid LIKE ?") + params.append(image_prefix + "%") + if kind_prefix is not None: + clauses.append("kind LIKE ?") + params.append(kind_prefix + "%") + cur = self._conn.execute( + "DELETE FROM image_cache WHERE " + " AND ".join(clauses), + tuple(params), + ) + self._conn.commit() + return cur.rowcount + class SessionStore: MAX_SHELL_HISTORY = 10_000 @@ -223,10 +253,15 @@ class SessionStore: def __init__(self, db_path: Path, session_key: str) -> None: self._session_key = session_key db_path.parent.mkdir(parents=True, exist_ok=True) - self._conn = sqlite3.connect(str(db_path), timeout=5.0) + self._conn = sqlite3.connect(str(db_path), timeout=CONTREE_DB_TIMEOUT) self._conn.row_factory = sqlite3.Row + # WAL: concurrent readers + one writer; safe across processes. + # synchronous=NORMAL: faster commits in WAL, still durable on + # crash; shortens write-lock hold time, reducing SQLITE_BUSY + # between two contree shells sharing the per-profile DB. self._conn.execute("PRAGMA journal_mode=WAL") - self._conn.execute("PRAGMA busy_timeout=5000") + self._conn.execute("PRAGMA synchronous=NORMAL") + self._conn.execute(f"PRAGMA busy_timeout={int(CONTREE_DB_TIMEOUT * 1000)}") self._conn.executescript(SCHEMA) @cached_property diff --git a/contree_cli/shell/argmap.py b/contree_cli/shell/argmap.py new file mode 100644 index 0000000..5c9b5cb --- /dev/null +++ b/contree_cli/shell/argmap.py @@ -0,0 +1,69 @@ +"""Central registry mapping argparse actions to completion sources. + +Keys are ``(command_path, dest)`` pairs where ``command_path`` is the +tuple of canonical subcommand names visited during the parser walk +(aliases such as ``op`` for ``operation`` or ``ls`` for ``list`` are +normalised via :func:`contree_cli.shell.argspec.canonical_name`). Empty +tuple is the root parser. The source name on the right must exist in +``SOURCES`` (see ``sources.py``). + +Adding a new completable argument is a one-line entry below. +""" + +from __future__ import annotations + +ArgKey = tuple[tuple[str, ...], str] + + +# Mapping of (command_path, action.dest) to a registered source name. +# Empty tuple is the root parser (top-level flags like --profile). +ARG_SOURCES: dict[ArgKey, str] = { + # Top-level flags on the root parser. + ((), "profile"): "profile", + # use / tag -- image references. + (("use",), "image"): "image", + (("tag",), "args"): "image", + # show / kill / wait -- operation UUIDs. + (("show",), "uuid"): "operation", + (("kill",), "uuid"): "operation", + (("session", "wait"), "op_ids"): "operation", + (("operation", "show"), "uuids"): "operation", + (("operation", "cancel"), "uuids"): "operation", + # Session keys. + (("session", "use"), "session_name"): "session", + (("session", "show"), "session_name"): "session", + (("session", "delete"), "keys"): "session", + # Branch names. + (("session", "branch"), "branch_name"): "branch", + (("session", "branch"), "from_branch"): "branch", + (("session", "checkout"), "checkout_branch"): "branch", + # Profile names (auth subtree). + (("auth",), "profile"): "profile", + (("auth", "switch"), "profile_name"): "profile", + (("auth", "remove"), "profile_name"): "profile", + # Sandbox filesystem. + (("ls",), "path"): "sandbox-path", + (("cat",), "path"): "sandbox-path", + (("cp",), "path"): "sandbox-path", + (("cp",), "dest"): "host-path", + (("cd",), "path"): "sandbox-dir", + (("file", "edit"), "path"): "sandbox-path", + (("file", "edit"), "editor"): "editor", + (("file", "cp"), "src"): "host-path", + (("file", "cp"), "dest"): "sandbox-path", + # run -- exec inside the sandbox. + (("run",), "command_args"): "sandbox-path", + (("run",), "cwd"): "sandbox-dir", + (("run",), "file"): "mapped-file", + (("run",), "use"): "image", + # env / skill. + (("env",), "vars"): "env-key", + (("skill", "install"), "specs"): "skill-spec", + (("skill", "remove"), "specs"): "skill-spec", + (("skill", "upgrade"), "specs"): "skill-spec", +} + + +def lookup(command_path: tuple[str, ...], dest: str) -> str | None: + """Return the source name registered for ``(command_path, dest)``.""" + return ARG_SOURCES.get((command_path, dest)) diff --git a/contree_cli/shell/argspec.py b/contree_cli/shell/argspec.py new file mode 100644 index 0000000..519904a --- /dev/null +++ b/contree_cli/shell/argspec.py @@ -0,0 +1,343 @@ +"""Argparse introspection helpers for the dynamic shell completer. + +Walks a live ``argparse.ArgumentParser`` tree to decide what the user is +typing (subcommand, flag name, flag value, positional) and records the +canonical subcommand names visited so :mod:`contree_cli.shell.argmap` +can resolve a completion source by ``(command_path, dest)``. +""" + +from __future__ import annotations + +import argparse +from dataclasses import dataclass, field + +# --------------------------------------------------------------------------- +# Walking the parser tree +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class WalkResult: + parser: argparse.ArgumentParser + consumed: int + seen_double_dash: bool + in_remainder: bool + remainder_action: argparse.Action | None + flags_seen: tuple[str, ...] = field(default_factory=tuple) + command_path: tuple[str, ...] = field(default_factory=tuple) + + +def canonical_name(parser: argparse.ArgumentParser) -> str: + """Return the canonical subcommand name a parser was registered under. + + ``argparse.add_parser("foo", aliases=["f"])`` builds one parser whose + ``prog`` ends with the canonical ``foo``; aliases share the same parser + object but never appear in ``prog``. Extracting the last whitespace + separated token therefore normalises alias dispatch to a single name. + """ + return parser.prog.rsplit(None, 1)[-1] + + +def walk( + root: argparse.ArgumentParser, + tokens: list[str], +) -> WalkResult: + """Descend *root* through subparsers based on *tokens*. + + Stops at the first token that does not match a subparser choice. + ``--`` and ``argparse.REMAINDER`` short-circuit the walk so later + helpers can offer the right completion (sandbox path or nothing). + ``command_path`` collects canonical subcommand names visited; the + completer uses it as the lookup key in :mod:`argmap`. + """ + parser = root + consumed = 0 + seen_double_dash = False + in_remainder = False + remainder_action: argparse.Action | None = None + flags_seen: list[str] = [] + command_path: list[str] = [] + + while consumed < len(tokens): + tok = tokens[consumed] + + if tok == "--": + seen_double_dash = True + consumed += 1 + continue + + if tok.startswith("-"): + flags_seen.append(tok.split("=", 1)[0]) + consumed += 1 + continue + + sub_action = find_subparsers(parser) + if sub_action is not None and tok in sub_action.choices: + chosen = sub_action.choices[tok] + assert isinstance(chosen, argparse.ArgumentParser) + parser = chosen + command_path.append(canonical_name(chosen)) + consumed += 1 + continue + + rem = find_remainder(parser) + if rem is not None: + in_remainder = True + remainder_action = rem + break + + consumed += 1 + + return WalkResult( + parser=parser, + consumed=consumed, + seen_double_dash=seen_double_dash, + in_remainder=in_remainder, + remainder_action=remainder_action, + flags_seen=tuple(flags_seen), + command_path=tuple(command_path), + ) + + +def find_subparsers( + parser: argparse.ArgumentParser, +) -> argparse._SubParsersAction[argparse.ArgumentParser] | None: + for action in parser._actions: + if isinstance(action, argparse._SubParsersAction): + return action + return None + + +def find_remainder(parser: argparse.ArgumentParser) -> argparse.Action | None: + for action in parser._actions: + if action.nargs == argparse.REMAINDER: + return action + return None + + +# --------------------------------------------------------------------------- +# Deciding what to complete next +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class Subcommand: + action: argparse._SubParsersAction[argparse.ArgumentParser] + + +@dataclass(frozen=True) +class FlagName: + parser: argparse.ArgumentParser + + +@dataclass(frozen=True) +class FlagValue: + action: argparse.Action + value_text: str + + +@dataclass(frozen=True) +class Positional: + action: argparse.Action + sticky: bool + + +@dataclass(frozen=True) +class End: + pass + + +Target = Subcommand | FlagName | FlagValue | Positional | End + + +# Action classes whose presence means "no value to complete after the flag". +NO_VALUE_ACTION_NAMES: frozenset[str] = frozenset( + { + "_StoreTrueAction", + "_StoreFalseAction", + "_CountAction", + "_StoreConstAction", + "_AppendConstAction", + "_HelpAction", + "_VersionAction", + } +) + +# Flag tokens that terminate parsing. +TERMINATING_FLAGS: frozenset[str] = frozenset({"-h", "--help"}) + + +def action_takes_value(action: argparse.Action) -> bool: + """Return True when *action* expects a value after its flag.""" + if action.nargs == 0: + return False + return type(action).__name__ not in NO_VALUE_ACTION_NAMES + + +def find_option_action( + parser: argparse.ArgumentParser, + flag: str, +) -> argparse.Action | None: + """Find the action registered for an option string, or ``None``.""" + for action in parser._actions: + if flag in action.option_strings: + return action + return None + + +def positional_actions( + parser: argparse.ArgumentParser, +) -> list[argparse.Action]: + """Return non-flag, non-help, non-subparsers actions in declaration order.""" + out: list[argparse.Action] = [] + for action in parser._actions: + if action.option_strings: + continue + if isinstance(action, argparse._SubParsersAction): + continue + out.append(action) + return out + + +def positional_min_consumes(action: argparse.Action) -> int: + """Minimum number of tokens an unfilled positional consumes.""" + nargs = action.nargs + if nargs is None: + return 1 + if isinstance(nargs, int): + return nargs + if nargs == "?": + return 0 + if nargs == "*": + return 0 + if nargs == "+": + return 1 + if nargs == argparse.REMAINDER: + return 0 + return 1 + + +def positional_is_sticky(action: argparse.Action) -> bool: + return action.nargs in {"*", "+", argparse.REMAINDER} + + +def count_positional_tokens( + tokens: list[str], + consumed: int, +) -> int: + """Count non-flag tokens after *consumed* in *tokens* (excludes trailing edit).""" + count = 0 + seen_dd = False + i = consumed + while i < len(tokens) - 1: + tok = tokens[i] + if tok == "--": + seen_dd = True + i += 1 + continue + if not seen_dd and tok.startswith("-"): + i += 1 + continue + count += 1 + i += 1 + return count + + +def split_equals_flag(text: str) -> tuple[str, str] | None: + """Split ``--flag=value`` into ``("--flag", "value")``, else ``None``.""" + if not text.startswith("-"): + return None + if "=" not in text: + return None + name, value = text.split("=", 1) + return name, value + + +def next_target( + walk_result: WalkResult, + tokens: list[str], + text: str, +) -> Target: + """Decide what the user is currently completing. + + *tokens* is the full prefix list excluding the in-progress *text*. The + walker already consumed the subparser portion; the prefix from + ``walk_result.consumed`` onwards belongs to the active parser. + """ + parser = walk_result.parser + + # Inside REMAINDER everything is part of the sticky positional. + if walk_result.in_remainder and walk_result.remainder_action is not None: + return Positional(walk_result.remainder_action, sticky=True) + + # Inline =-form flag, e.g. user typed "--use=tag:ub". + eq = split_equals_flag(text) + if eq is not None: + name, value_text = eq + action = find_option_action(parser, name) + if action is None: + return End() + if not action_takes_value(action): + return End() + return FlagValue(action, value_text) + + # User is typing a flag name. + if text.startswith("-") and not walk_result.seen_double_dash: + return FlagName(parser) + + # Previous token was a flag terminator -> nothing to complete. + prev = tokens[-1] if tokens else "" + if prev in TERMINATING_FLAGS: + return End() + + # Previous token was a flag expecting a value -> complete that value. + if prev.startswith("-") and not walk_result.seen_double_dash: + action = find_option_action(parser, prev) + if action is not None and action_takes_value(action): + return FlagValue(action, text) + + # Subcommand step: at the active parser, the next non-flag positional + # might be a subparser choice. + sub_action = find_subparsers(parser) + if sub_action is not None: + # If we are still at the subparser slot (none of its choices was + # consumed), offer subcommand names. + slot_filled = any( + tok in sub_action.choices + for tok in tokens[walk_result.consumed : -1] + if not tok.startswith("-") and tok != "--" + ) + if not slot_filled: + return Subcommand(sub_action) + + # Positional slot: pick the first unfilled positional action. + positional_target = pick_positional(parser, tokens, walk_result.consumed) + if positional_target is not None: + return positional_target + + return End() + + +def pick_positional( + parser: argparse.ArgumentParser, + tokens: list[str], + consumed: int, +) -> Positional | None: + """Return the next unfilled positional action, if any.""" + actions = positional_actions(parser) + if not actions: + return None + filled = count_positional_tokens(tokens, consumed) + cursor = 0 + for action in actions: + if positional_is_sticky(action): + return Positional(action, sticky=True) + slot = positional_min_consumes(action) + # nargs=None means exactly one + slot = max(slot, 1) if action.nargs in (None, "?") and slot == 0 else slot + if action.nargs == "?": + slot = 1 # treat as one-slot for filling order + if filled - cursor < slot: + return Positional(action, sticky=False) + cursor += slot + return None diff --git a/contree_cli/shell/cache.py b/contree_cli/shell/cache.py new file mode 100644 index 0000000..056bf5d --- /dev/null +++ b/contree_cli/shell/cache.py @@ -0,0 +1,109 @@ +"""TTL-aware persistent cache for completion sources. + +Wraps :class:`contree_cli.session.ImageCache` (sqlite-backed +``MutableMapping[(image_uuid, kind), object]``) to add a fetched-at +timestamp and per-source TTL semantics. Keys are namespaced by the active +profile name so a stale image list from one profile is never returned to +another after ``contree auth switch``. +""" + +from __future__ import annotations + +import contextlib +import time +from dataclasses import dataclass + +from contree_cli.session import ImageCache + + +@dataclass(frozen=True) +class CacheEntry: + value: object + fetched_at: float + + def is_fresh(self, ttl: float, now: float | None = None) -> bool: + if ttl <= 0: + return True + current = now if now is not None else time.time() + return current - self.fetched_at < ttl + + +class SourceCache: + """Adapter over :class:`ImageCache` that adds TTL bookkeeping.""" + + __slots__ = ("backend", "profile") + + def __init__(self, backend: ImageCache, profile: str) -> None: + self.backend = backend + self.profile = profile + + def namespaced(self, scope: str, kind: str) -> tuple[str, str]: + """Build a profile-scoped key.""" + if scope: + return (f"profile:{self.profile}:{scope}", kind) + return (f"profile:{self.profile}", kind) + + def get(self, scope: str, kind: str, ttl: float) -> object | None: + key = self.namespaced(scope, kind) + try: + raw = self.backend[key] + except KeyError: + return None + if not isinstance(raw, dict): + return None + if "value" not in raw or "fetched_at" not in raw: + return None + entry = CacheEntry( + value=raw["value"], + fetched_at=float(raw["fetched_at"]), + ) + if not entry.is_fresh(ttl): + return None + return entry.value + + def set(self, scope: str, kind: str, value: object) -> None: + key = self.namespaced(scope, kind) + self.backend[key] = {"value": value, "fetched_at": time.time()} + + def invalidate(self, scope: str, kind: str) -> None: + key = self.namespaced(scope, kind) + with contextlib.suppress(KeyError): + del self.backend[key] + + def invalidate_kind_prefix(self, kind_prefix: str) -> None: + """Drop every cache entry whose kind starts with *kind_prefix*.""" + scope_prefix = f"profile:{self.profile}" + to_drop = [ + (image_uuid, kind) + for image_uuid, kind in list(self.backend) + if image_uuid.startswith(scope_prefix) and kind.startswith(kind_prefix) + ] + for key in to_drop: + with contextlib.suppress(KeyError): + del self.backend[key] + + def invalidate_scope(self, scope: str) -> None: + """Drop every cache entry under the given scope (e.g. an image uuid).""" + prefix = ( + f"profile:{self.profile}:{scope}" if scope else f"profile:{self.profile}" + ) + to_drop = [ + (image_uuid, kind) + for image_uuid, kind in list(self.backend) + if image_uuid == prefix or image_uuid.startswith(prefix + ":") + ] + for key in to_drop: + with contextlib.suppress(KeyError): + del self.backend[key] + + def invalidate_all(self) -> None: + """Drop every entry under the active profile.""" + prefix = f"profile:{self.profile}" + to_drop = [ + (image_uuid, kind) + for image_uuid, kind in list(self.backend) + if image_uuid == prefix or image_uuid.startswith(prefix + ":") + ] + for key in to_drop: + with contextlib.suppress(KeyError): + del self.backend[key] diff --git a/contree_cli/shell/completer.py b/contree_cli/shell/completer.py index eea6492..887480d 100644 --- a/contree_cli/shell/completer.py +++ b/contree_cli/shell/completer.py @@ -1,15 +1,44 @@ -"""Tab completion for the interactive shell.""" +"""Tab completion for the interactive shell. + +Argparse-driven: walks the live ``ShellArgumentParser`` tree returned +by :func:`build_shell_parser` to decide what the user is typing +(subcommand, flag name, flag value, positional). Per-action completion +sources live in :mod:`contree_cli.shell.argmap`, keyed by +``(command_path, dest)``. + +The trie handles only bare-token shell builtins (``cd``, ``pwd``, +``vim``, ``ls``, ``cat``, ``--format``, ``-f``, ...) since +``repl.execute`` intercepts them before argparse sees them. + +Public entry points: :class:`ShellCompleter` with +``.complete(text, state)`` (readline hook) and +``.compute_completions(text, line, begidx)``. +""" from __future__ import annotations import argparse -import json import logging import shlex from typing import TYPE_CHECKING -from contree_cli.output import FORMATTERS -from contree_cli.shell.parser import CommandInfo, ShellArgumentParser, get_command_names +from contree_cli.shell import argspec +from contree_cli.shell.argmap import lookup as argmap_lookup +from contree_cli.shell.cache import SourceCache +from contree_cli.shell.parser import ( + CommandInfo, + ShellArgumentParser, + build_shell_parser, + get_command_names, +) +from contree_cli.shell.sources import ( + SOURCES, + CompletionContext, + complete_choices, + complete_command_name, + complete_sandbox_dir, + complete_sandbox_path, +) from contree_cli.shell.trie import Handler, PrefixRouter if TYPE_CHECKING: @@ -19,8 +48,27 @@ log = logging.getLogger(__name__) +BUILTIN_BARE_COMMANDS: tuple[str, ...] = ( + "cd", + "pwd", + "history", + "help", + "clear", + "exit", + "quit", + "vim", + "vi", + "nvim", + "nano", + "ls", + "cat", + "--format", + "-f", +) + + class ShellCompleter: - """Readline completer with PrefixRouter-based dispatch.""" + """Readline completer with argparse-driven dispatch.""" def __init__( self, @@ -29,74 +77,23 @@ def __init__( store: SessionStore | None = None, root_parser: ShellArgumentParser | None = None, ) -> None: - self._commands = commands - self._command_names = get_command_names() - self._client = client - self._store = store - self._root_parser = root_parser - self._matches: list[str] = [] + self.commands = commands + self.command_names = get_command_names() + self.client = client + self.store = store + self.matches: list[str] = [] - self.router: PrefixRouter = PrefixRouter() - self._build_router() + if root_parser is None: + root_parser, _ = build_shell_parser() + self.root_parser = root_parser - def _build_router(self) -> None: - r = self.router + self.router: PrefixRouter = PrefixRouter() + self.build_router() - # Shell builtins with no argument completion - for name in ("exit", "quit", "pwd", "history", "clear"): - r[(name,)] = self._complete_noop - - r[("help",)] = self._complete_help_names - r[("cd",)] = self._complete_dir_only - - # Editors → sandbox path - for name in ("vim", "vi", "nano"): - r[(name,)] = self._complete_sandbox_path - - # Bare aliases → sandbox path (same as contree ls/cat) - r[("ls",)] = self._complete_sandbox_path - r[("cat",)] = self._complete_sandbox_path - - # contree subcommand argument completers - arg_map: dict[str, Handler] = { - "ls": self._complete_sandbox_path, - "cat": self._complete_sandbox_path, - "cp": self._complete_sandbox_path, - "cd": self._complete_dir_only, - "use": self._complete_image, - "tag": self._complete_image, - "show": self._complete_operation, - "kill": self._complete_operation, - } - - # Register all contree commands (names + aliases) - for name in self._command_names: - handler = arg_map.get(name, self._complete_noop) - r[("contree", name)] = handler - - # Register subparser children so subcommand name completion works. - # E.g. "contree file " should show "edit", "cp", etc. - for name, info in self._commands.items(): - for sub_name in self._get_subcommand_names(info.parser): - key = ("contree", name, sub_name) - if key not in r: - r[key] = self._complete_noop - - # --format / -f — complete format names - r[("--format",)] = self._complete_format_name - r[("-f",)] = self._complete_format_name - - # Nested subcommands with specific completers - r[("contree", "session", "use")] = self._complete_session_name - r[("contree", "session", "checkout")] = self._complete_branch - r[("contree", "session", "co")] = self._complete_branch - r[("contree", "session", "branch")] = self._complete_branch - r[("contree", "session", "br")] = self._complete_branch - r[("contree", "file", "edit")] = self._complete_sandbox_path - r[("contree", "file", "e")] = self._complete_sandbox_path + # -- public API used by readline and tests ---------------------------- def complete(self, text: str, state: int) -> str | None: - """Readline completer function (called repeatedly with state=0,1,...).""" + """Readline completer hook (called repeatedly with state=0,1,...).""" if state == 0: try: import readline @@ -106,9 +103,9 @@ def complete(self, text: str, state: int) -> str | None: except (ImportError, AttributeError): line = text begidx = 0 - self._matches = self.compute_completions(text, line, begidx) - if state < len(self._matches): - return self._matches[state] + self.matches = self.compute_completions(text, line, begidx) + if state < len(self.matches): + return self.matches[state] return None def compute_completions( @@ -117,134 +114,125 @@ def compute_completions( line: str, begidx: int, ) -> list[str]: - """Determine context and return matching completions.""" + """Return matches for *text* given the full *line* and cursor index.""" before_cursor = line[:begidx] try: tokens = shlex.split(before_cursor) except ValueError: return [] - # No tokens yet → root child names + ctx = self.context() + + # First token completion: nothing typed yet, or a single partial. if not tokens: return [n + " " for n in self.router.children if n.startswith(text)] - node, depth = self.router.resolve(tuple(tokens)) - remaining = tuple(tokens[depth:]) + # Bare-token shell builtins go through the trie. + first = tokens[0] + if first in BUILTIN_BARE_COMMANDS: + return self.dispatch_trie(tokens, text, ctx) - # Flag-value completion: -f/--format → format names - if tokens and tokens[-1] in ("-f", "--format"): - return self._complete_format_name((), text) + # Everything else is argparse-driven: with or without the literal + # "contree" prefix. + argparse_tokens = tokens[1:] if first == "contree" else tokens + return self.complete_argparse(argparse_tokens, text, ctx) - # Flags: look up the parser from commands dict - if text.startswith("-"): - parser = self._find_parser(tokens) - if parser is not None: - return self._complete_flags(parser, text) + # -- trie path -------------------------------------------------------- - # Children → subcommand name completion - if node.children: - matches = [n + " " for n in node.children if n.startswith(text)] - if matches: - return matches + def build_router(self) -> None: + r = self.router - # Handler with remaining tokens - if node.value is not None: - return node.value(remaining, text) + for name in ("exit", "quit", "pwd", "clear"): + r[(name,)] = handler_noop - # Fallback: implicit run mode — path-like text gets path completion, - # anything else gets root command names as suggestions. - if self._looks_like_path(text): - return self._complete_sandbox_path((), text) + r[("history",)] = handler_noop - return [n + " " for n in self.router.children if n.startswith(text)] + r[("help",)] = self.handler_help + r[("cd",)] = self.handler_sandbox_dir - # ------------------------------------------------------------------ - # Parser lookup for flag completion - # ------------------------------------------------------------------ + for name in ("vim", "vi", "nvim", "nano"): + r[(name,)] = self.handler_sandbox_path - def _find_parser( + r[("ls",)] = self.handler_sandbox_path + r[("cat",)] = self.handler_sandbox_path + + # Format flag belongs to the trie so "--format " works as a + # standalone shell builtin (intercepted in repl.execute). + r[("--format",)] = self.handler_format + r[("-f",)] = self.handler_format + + # Register every contree command name (and aliases) as router roots + # so first-token completion still lists them. Their values are + # resolved by the argparse path, so the handler is a noop here. + for name in self.command_names: + r[("contree", name)] = handler_noop + + def dispatch_trie( self, tokens: list[str], - ) -> argparse.ArgumentParser | None: - """Find the argparse parser for the current command context.""" - # Strip "contree" prefix if present - cmd_tokens = tokens[1:] if tokens and tokens[0] == "contree" else tokens - if not cmd_tokens: - return None - cmd_name = cmd_tokens[0] - if cmd_name not in self._commands: - return None - return self._commands[cmd_name].parser - - # ------------------------------------------------------------------ - # Completion handlers (remaining, text) -> list[str] - # ------------------------------------------------------------------ - - def _complete_noop( - self, - remaining: tuple[str, ...], text: str, + ctx: CompletionContext, ) -> list[str]: + """Resolve a builtin via the trie.""" + node, depth = self.router.resolve(tuple(tokens)) + # Format flag value: "--format " or "-f ". + if tokens and tokens[-1] in ("--format", "-f"): + return self.handler_format((), text, ctx) + if node.value is not None: + remaining = tuple(tokens[depth:]) + return node.value(remaining, text, ctx) return [] - def _complete_help_names( - self, - remaining: tuple[str, ...], - text: str, - ) -> list[str]: - """``help `` — all known command / alias / shell names.""" - all_names = sorted({*self._command_names, *self.router.children}) - return [n + " " for n in all_names if n.startswith(text)] + # -- argparse path ---------------------------------------------------- - def _complete_format_name( + def complete_argparse( self, - remaining: tuple[str, ...], + tokens: list[str], text: str, + ctx: CompletionContext, ) -> list[str]: - """Complete output format names (``--format json``, ``-f table``).""" - return [n + " " for n in sorted(FORMATTERS) if n.startswith(text)] + walk_result = argspec.walk(self.root_parser, tokens) + target = argspec.next_target(walk_result, tokens, text) + + if isinstance(target, argspec.End): + # Implicit-run fallback: bare command not recognised by argparse, + # path-like text gets sandbox-path completion. + if self.looks_like_path(text): + return complete_sandbox_path(text, ctx) + if not tokens: + return [n + " " for n in self.router.children if n.startswith(text)] + return [] - def _complete_dir_only( - self, - remaining: tuple[str, ...], - text: str, - ) -> list[str]: - """Complete sandbox directory paths (no files).""" - return self._complete_sandbox_path_inner(text, dirs_only=True) + if isinstance(target, argspec.Subcommand): + choices = list(target.action.choices.keys()) + return [n + " " for n in choices if n.startswith(text)] - def _complete_sandbox_path( - self, - remaining: tuple[str, ...], - text: str, - ) -> list[str]: - """Complete a sandbox file/directory path.""" - return self._complete_sandbox_path_inner(text) + if isinstance(target, argspec.FlagName): + return self.list_flag_names(target.parser, text) - @staticmethod - def _looks_like_path(text: str) -> bool: - """Return True when *text* looks like a filesystem path.""" - return "/" in text or text.startswith(".") or text.startswith("~") + if isinstance(target, argspec.FlagValue): + return self.complete_action_value( + target.action, + target.value_text, + ctx, + walk_result.command_path, + ) - # ------------------------------------------------------------------ - # Low-level helpers - # ------------------------------------------------------------------ + if isinstance(target, argspec.Positional): + return self.complete_action_value( + target.action, + text, + ctx, + walk_result.command_path, + ) - @staticmethod - def _get_subcommand_names( - parser: argparse.ArgumentParser, - ) -> list[str]: - """Extract subcommand names from a parser (if it has subparsers).""" - for action in parser._actions: - if isinstance(action, argparse._SubParsersAction): - return list(action.choices.keys()) return [] - def _complete_flags( + def list_flag_names( self, parser: argparse.ArgumentParser, text: str, ) -> list[str]: - """Complete flag names from parser actions.""" flags: list[str] = [] for action in parser._actions: for opt in action.option_strings: @@ -252,234 +240,132 @@ def _complete_flags( flags.append(opt + " ") return flags - def _complete_sandbox_path_inner( + def complete_action_value( self, + action: argparse.Action, text: str, - *, - dirs_only: bool = False, + ctx: CompletionContext, + command_path: tuple[str, ...], ) -> list[str]: - """Complete a sandbox file path via the inspect API.""" - if self._client is None or self._store is None: - return [] - - try: - session = self._store.session - if session is None: - return [] - image_uuid = session.current_image - except (SystemExit, Exception): - return [] + source_name = argmap_lookup(command_path, action.dest) + if source_name is not None: + fn = SOURCES.get(source_name) + if fn is not None: + return fn(text, ctx) + if action.choices is not None: + return complete_choices(action.choices, text) + return [] - # Split into directory + prefix for the API query. - # resolve_path handles cwd joining and .. normalisation. - if "/" in text: - last_slash = text.rindex("/") - user_dir = text[: last_slash + 1] or "/" - prefix = text[last_slash + 1 :] - resolved = self._store.resolve_path(user_dir) - api_dir = resolved if resolved == "/" else resolved + "/" - else: - user_dir = "" - prefix = text - resolved = self._store.resolve_path("") - api_dir = resolved if resolved == "/" else resolved + "/" - - entries = self._list_dir(image_uuid, api_dir) - if entries is None: - return [] + # -- handlers used by trie ------------------------------------------- - results: list[str] = [] - for entry in entries: - path = entry.get("path", "") - if not isinstance(path, str) or not path: - continue - is_dir = bool(entry.get("is_dir")) - if dirs_only and not is_dir: - continue - # API returns full paths like "/etc/hosts" — extract basename - name = path.rsplit("/", 1)[-1] - if not name.startswith(prefix): - continue - # Return the path as the user typed it (relative or absolute) - full = user_dir + name - if is_dir: - full += "/" - else: - full += " " - results.append(full) - return results - - def _complete_image( + def handler_help( self, remaining: tuple[str, ...], text: str, + ctx: CompletionContext, ) -> list[str]: - """Complete image references (``tag:NAME`` or UUID). - - When the user types a ``tag:`` prefix, matches are filtered - against the full ``tag:NAME`` candidate. Otherwise bare text - is matched against tag names directly and the completion - inserts the ``tag:`` prefix for the user. - """ - images = self._list_images() - if images is None: - return [] + return complete_command_name(text, ctx) - results: list[str] = [] - for img in images: - tag = img.get("tag") - if isinstance(tag, str) and tag: - prefixed = f"tag:{tag}" - if text.startswith("tag:"): - if prefixed.startswith(text): - results.append(prefixed + " ") - elif tag.startswith(text): - results.append(prefixed + " ") - uuid_str = img.get("uuid") - if isinstance(uuid_str, str) and uuid_str.startswith(text): - results.append(uuid_str + " ") - return results - - def _complete_operation( + def handler_sandbox_dir( self, remaining: tuple[str, ...], text: str, + ctx: CompletionContext, ) -> list[str]: - """Complete operation UUIDs for ``show`` and ``kill``.""" - ops = self._list_operations() - if ops is None: - return [] - results: list[str] = [] - for op in ops: - uuid_str = op.get("uuid") - if isinstance(uuid_str, str) and uuid_str.startswith(text): - results.append(uuid_str + " ") - return results - - def _list_operations(self) -> list[dict[str, object]] | None: - """Fetch recent operations from the API (no caching).""" - if self._client is None: - return None - try: - resp = self.client.get( - "/v1/operations", - params={"limit": "100"}, - ) - data = json.loads(resp.read()) - return data.get("operations", []) # type: ignore[no-any-return] - except Exception: - log.debug("Operation completion failed") - return None + return complete_sandbox_dir(text, ctx) - def _complete_session_name( + def handler_sandbox_path( self, remaining: tuple[str, ...], text: str, + ctx: CompletionContext, ) -> list[str]: - """Complete session names for ``session use``.""" - if self._store is None: - return [] - try: - sessions = self._store.list_sessions() - except Exception: - return [] - results: list[str] = [] - for s in sessions: - key = s.session_key - # Match full key - if key.startswith(text): - results.append(key + " ") - # Match suffix (last component after _) - suffix = key.rsplit("_", 1)[-1] if "_" in key else "" - if suffix and suffix != key and suffix.startswith(text): - results.append(suffix + " ") - return results - - def _complete_branch( + return complete_sandbox_path(text, ctx) + + def handler_format( self, remaining: tuple[str, ...], text: str, + ctx: CompletionContext, ) -> list[str]: - """Complete branch names for ``session checkout/branch``.""" - if self._store is None: - return [] - try: - branches = self._store.list_branches() - except Exception: - return [] - return [name + " " for name, _active in branches if name.startswith(text)] - - @property - def client(self) -> ContreeClient: - if self._client is None: - raise RuntimeError("ContreeClient is not set") - return self._client - - @property - def cache(self) -> ImageCache: - if self._store is None: - raise RuntimeError("SessionStore is not set") - return self._store.cache + from contree_cli.output import FORMATTERS - def cached( - self, - key: tuple[str, str], - ) -> list[dict[str, object]] | None: - """Return a cached value or ``None``.""" - result = self.cache.get(key) - return result # type: ignore[return-value] + return [n + " " for n in sorted(FORMATTERS) if n.startswith(text)] - def _list_images(self) -> list[dict[str, object]] | None: - """Fetch image list from the API, with persistent caching.""" - if self._client is None or self._store is None: - return None + # -- helpers --------------------------------------------------------- - cache_key = ("", "images") - cached = self.cached(cache_key) - if cached is not None: - return cached + @staticmethod + def looks_like_path(text: str) -> bool: + return "/" in text or text.startswith(".") or text.startswith("~") - try: - resp = self.client.get( - "/v1/images", - params={"limit": "100"}, - ) - data = json.loads(resp.read()) - images: list[dict[str, object]] = data.get("images", []) - self.cache[cache_key] = images - return images - except Exception: - log.debug("Image completion failed") - return None - - def _list_dir( + def list_dir( self, image_uuid: str, dir_path: str, ) -> list[dict[str, object]] | None: - """List a sandbox directory, with persistent caching.""" - cache_key = (image_uuid, f"files:{dir_path}") + """Method-form wrapper around :func:`sources.list_sandbox_dir`. - cached = self.cached(cache_key) - if cached is not None: - return cached + Bound on the instance so callers (including tests) can override + the sandbox listing strategy by patching one attribute. + """ + from contree_cli.shell.sources import list_sandbox_dir + + return list_sandbox_dir(self.context(), image_uuid, dir_path) + + def context(self) -> CompletionContext: + cache = ( + SourceCache(self.store.cache, self.profile_name()) + if self.store is not None + else None + ) + cwd = "" + if self.store is not None: + try: + cwd_value = self.store.get_cwd() + cwd = cwd_value if isinstance(cwd_value, str) else "" + except Exception: + cwd = "" + return CompletionContext( + client=self.client, + store=self.store, + cache=cache, + profile=self.profile_name(), + cwd=cwd, + tokens=tuple(), + list_dir=self.list_dir, + ) + + @staticmethod + def profile_name() -> str: + from contree_cli import PROFILE try: - from contree_cli.client import resolve_image + return PROFILE.get().name + except LookupError: + return "default" - uuid = resolve_image(self.client, image_uuid) - resp = self.client.get( - f"/v1/inspect/{uuid}/list", - params={"path": dir_path}, - ) - data = json.loads(resp.read()) - file_list: list[dict[str, object]] = data.get("files", []) - self.cache[cache_key] = file_list - return file_list - except Exception: - log.debug( - "Path completion failed for %s:%s", - image_uuid, - dir_path, - ) - return None + @property + def cache(self) -> ImageCache: + if self.store is None: + raise RuntimeError("SessionStore is not set") + return self.store.cache + + +# --------------------------------------------------------------------------- +# Trie handlers shared across multiple commands +# --------------------------------------------------------------------------- + + +def handler_noop( + remaining: tuple[str, ...], + text: str, + ctx: CompletionContext, +) -> list[str]: + return [] + + +__all__ = [ + "Handler", + "PrefixRouter", + "ShellCompleter", +] diff --git a/contree_cli/shell/repl.py b/contree_cli/shell/repl.py index 51e0a3e..c6ee81b 100644 --- a/contree_cli/shell/repl.py +++ b/contree_cli/shell/repl.py @@ -9,10 +9,11 @@ import sys from functools import cached_property -from contree_cli import FORMATTER, IN_SHELL, SESSION_STORE, ArgumentsProtocol +from contree_cli import FORMATTER, IN_SHELL, PROFILE, SESSION_STORE, ArgumentsProtocol from contree_cli.client import ApiError from contree_cli.output import FORMATTERS, OutputFormatter from contree_cli.session import SessionStore +from contree_cli.shell.cache import SourceCache from contree_cli.shell.completer import ShellCompleter from contree_cli.shell.parser import ShellArgumentParser, ShellParseError from contree_cli.types import Colors @@ -51,6 +52,46 @@ def _readline_safe_prompt(prompt: str) -> str: return ANSI_RE.sub(lambda m: "\x01" + m.group() + "\x02", prompt) +DURATION_RE = re.compile(r"\A(\d+(?:\.\d+)?)([smhd]?)\Z") +DURATION_UNITS = {"": 1, "s": 1, "m": 60, "h": 3600, "d": 86400} + + +def parse_duration(text: str) -> int | None: + """Parse a ``timeout`` duration spec like ``60``, ``30s``, ``5m``, ``1h``. + + Returns the duration in whole seconds, or ``None`` if *text* is not a + valid spec (in which case the caller should fall through and treat the + user input as a regular command line). + """ + match = DURATION_RE.match(text) + if match is None: + return None + value = float(match.group(1)) + multiplier = DURATION_UNITS[match.group(2)] + seconds = int(value * multiplier) + if seconds <= 0: + return None + return seconds + + +def intercept_timeout(line: str) -> tuple[int, str] | None: + """Detect ``timeout DURATION COMMAND...`` and split off the duration. + + Returns ``(seconds, remainder_line)`` when *line* starts with the + ``timeout`` builtin followed by a parseable duration and at least one + further token; ``None`` otherwise. Whitespace and quoting in the + remainder are preserved verbatim so ``sh -c`` sees the same expression + the user typed. + """ + parts = line.split(None, 2) + if len(parts) < 3 or parts[0] != "timeout": + return None + seconds = parse_duration(parts[1]) + if seconds is None: + return None + return seconds, parts[2] + + # Bare names that are forwarded as contree management commands. CONTREE_ALIASES = frozenset({"ls", "cat"}) @@ -91,6 +132,20 @@ def _readline_safe_prompt(prompt: str) -> str: " help run help for the run command" ), "clear": "Usage: clear\n\nClear the terminal screen.", + "timeout": ( + "Usage: timeout DURATION COMMAND...\n" + "\n" + "Run COMMAND in the sandbox with the API operation timeout set\n" + "to DURATION. Mirrors the convention of the GNU 'timeout'\n" + "binary but enforces the limit server-side instead of spawning\n" + "a local one.\n" + "\n" + "DURATION is an integer or float, optionally followed by a\n" + "unit suffix: s (seconds, default), m (minutes), h (hours),\n" + "d (days). When DURATION cannot be parsed, the line is sent to\n" + "the sandbox unmodified so the in-image 'timeout' binary still\n" + "works for advanced cases (signals, --kill-after, etc)." + ), "exit": "Usage: exit | quit\n\nExit the interactive shell (Ctrl-D also works).", "--format": ( "Usage: --format [NAME] | -f [NAME]\n" @@ -303,13 +358,74 @@ def execute(self, line: str) -> None: resolved = [cmd] + [self.resolve_path(a) for a in args] self.dispatch_contree(resolved) else: - self.dispatch_run(tokens) + self.dispatch_run(line) case "vim" | "vi" | "nvim" | "nano": self.dispatch_edit(cmd, tokens[1:]) case "contree": self.dispatch_contree(tokens[1:]) + case "timeout": + intercepted = intercept_timeout(line) + if intercepted is None: + self.dispatch_run(line) + else: + seconds, remainder = intercepted + self.dispatch_run(remainder, timeout=seconds) case _: - self.dispatch_run(tokens) + self.dispatch_run(line) + + def session_snapshot(self) -> tuple[str, str, str, str]: + """Capture the state we watch for completion-cache invalidation.""" + try: + profile = PROFILE.get().name + except LookupError: + profile = "default" + try: + session = self.session_store.session + except (LookupError, Exception): + session = None + if session is None: + return profile, self.session_store.session_key, "", "" + return ( + profile, + session.session_key, + session.current_image, + session.active_branch, + ) + + def invalidate_completion_cache( + self, + before: tuple[str, str, str, str], + after: tuple[str, str, str, str], + ) -> None: + """Bust completion caches when watched session state changed.""" + profile_before, key_before, image_before, branch_before = before + profile_after, key_after, image_after, branch_after = after + try: + cache = SourceCache(self.session_store.cache, profile_after) + except (LookupError, Exception): + return + + if profile_before != profile_after: + cache.invalidate_all() + with contextlib.suppress(Exception): + SourceCache( + self.session_store.cache, + profile_before, + ).invalidate_all() + return + + if key_before != key_after: + cache.invalidate_kind_prefix("") + return + + if image_before != image_after: + cache.invalidate("", "images") + cache.invalidate_scope(image_before) + cache.invalidate_scope(image_after) + cache.invalidate("", "operations") + + if branch_before != branch_after: + cache.invalidate_kind_prefix("branches") def dispatch_contree(self, tokens: list[str]) -> None: """Dispatch a contree management command via argparse.""" @@ -348,6 +464,7 @@ def dispatch_contree(self, tokens: list[str]) -> None: formatter = FORMATTER.get() + before = self.session_snapshot() try: handler(loader.from_args(ns)) except ApiError as exc: @@ -363,14 +480,30 @@ def dispatch_contree(self, tokens: list[str]) -> None: formatter.flush() if fmt_token is not None: FORMATTER.reset(fmt_token) + self.invalidate_completion_cache(before, self.session_snapshot()) - def dispatch_run(self, tokens: list[str]) -> None: - """Dispatch tokens as an implicit ``run`` in the sandbox.""" + def dispatch_run(self, line: str, *, timeout: int | None = None) -> None: + """Dispatch a raw input line as an implicit ``run`` in the sandbox. + + The line is forwarded verbatim as a single shell expression so the + remote ``sh -c`` sees operators like ``|``, ``;``, ``&&`` as the + user typed them, rather than as quoted literal tokens. + + ``timeout`` overrides the API operation timeout. When ``None`` the + server falls back to its default. Set explicitly by the ``timeout`` + shell builtin (``timeout 60 long-build``). + """ from contree_cli.cli.run import RunArgs, cmd_run - args = RunArgs(command_args=tokens, shell=True, cwd=self.cwd) + args = RunArgs( + command_args=[line], + shell=True, + cwd=self.cwd, + timeout=timeout, + ) formatter = FORMATTER.get() + before = self.session_snapshot() try: cmd_run(args) except ApiError as exc: @@ -383,6 +516,7 @@ def dispatch_run(self, tokens: list[str]) -> None: log.error("Command failed: %s", exc, exc_info=True) finally: formatter.flush() + self.invalidate_completion_cache(before, self.session_snapshot()) def dispatch_edit(self, editor: str, args: list[str]) -> None: """Open a sandbox file in a host editor via ``file edit``.""" diff --git a/contree_cli/shell/sources.py b/contree_cli/shell/sources.py new file mode 100644 index 0000000..116c98a --- /dev/null +++ b/contree_cli/shell/sources.py @@ -0,0 +1,491 @@ +"""Named completion sources for the shell. + +Each source produces candidate completions for one kind of value +(image, operation, branch, profile, sandbox path, ...). Sources are +keyed by short name and looked up by the completer via +:mod:`contree_cli.shell.argmap`, which maps ``(command_path, dest)`` +pairs to source names. + +Sources accept ``client=None`` and ``store=None`` and return ``[]`` so +they remain safe in tests and during partial setup. +""" + +from __future__ import annotations + +import json +import logging +import os +import posixpath +import re +from collections.abc import Callable, Iterable +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from contree_cli.config import Config +from contree_cli.mapped_file import split_mapped_value +from contree_cli.output import FORMATTERS +from contree_cli.shell.cache import SourceCache + +if TYPE_CHECKING: + from contree_cli.client import ContreeClient + from contree_cli.session import SessionStore + +log = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Context passed to sources at completion time +# --------------------------------------------------------------------------- + + +SandboxListFn = Callable[ + [str, str], + "list[dict[str, object]] | None", +] + + +@dataclass(frozen=True) +class CompletionContext: + client: ContreeClient | None + store: SessionStore | None + cache: SourceCache | None + profile: str + cwd: str + tokens: tuple[str, ...] + list_dir: SandboxListFn | None = None + + +SourceFn = Callable[[str, CompletionContext], list[str]] + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +EDITOR_ALIASES: tuple[str, ...] = ("vim", "vi", "nvim", "nano") +SKILL_SPEC_PREFIXES: tuple[str, ...] = ( + "claude:", + "claude:~", + "codex:~", + "opencode:~", + "amp:~", + "cline:~", + "claude-subagent:", + "claude-agent:", +) +TAG_LETTERS: tuple[str, ...] = ("u", "g", "m") +PS_KIND_CHOICES: tuple[str, ...] = ("instance", "import", "delete") +PS_STATUS_CHOICES: tuple[str, ...] = ( + "PENDING", + "ASSIGNED", + "EXECUTING", + "SUCCESS", + "FAILED", + "CANCELLED", +) + + +def with_trailing_space(names: Iterable[str], text: str) -> list[str]: + return [n + " " for n in names if n.startswith(text)] + + +# --------------------------------------------------------------------------- +# Live data sources +# --------------------------------------------------------------------------- + + +def fetch_images(ctx: CompletionContext) -> list[dict[str, object]]: + """Return the list of images (cached, profile-namespaced).""" + if ctx.client is None: + return [] + if ctx.cache is not None: + cached = ctx.cache.get(scope="", kind="images", ttl=60.0) + if isinstance(cached, list): + return cached + try: + resp = ctx.client.get("/v1/images", params={"limit": "100"}) + data = json.loads(resp.read()) + images: list[dict[str, object]] = data.get("images", []) + except Exception: + log.debug("image source: API call failed", exc_info=True) + return [] + if ctx.cache is not None: + ctx.cache.set(scope="", kind="images", value=images) + return images + + +def complete_image(text: str, ctx: CompletionContext) -> list[str]: + """Complete image references: ``tag:NAME`` or UUID. + + Bare text matching a tag name auto-prefixes the candidate with + ``tag:``; explicit ``tag:`` text filters by the full candidate. + """ + images = fetch_images(ctx) + results: list[str] = [] + for img in images: + tag = img.get("tag") + if isinstance(tag, str) and tag: + prefixed = f"tag:{tag}" + if text.startswith("tag:"): + if prefixed.startswith(text): + results.append(prefixed + " ") + elif tag.startswith(text): + results.append(prefixed + " ") + uuid_str = img.get("uuid") + if isinstance(uuid_str, str) and uuid_str.startswith(text): + results.append(uuid_str + " ") + return results + + +def fetch_operations(ctx: CompletionContext) -> list[dict[str, object]]: + """Return recent operations (cached short TTL, profile-namespaced).""" + if ctx.client is None: + return [] + if ctx.cache is not None: + cached = ctx.cache.get(scope="", kind="operations", ttl=5.0) + if isinstance(cached, list): + return cached + ops: list[dict[str, object]] + try: + resp = ctx.client.get("/v1/operations", params={"limit": "100"}) + data = json.loads(resp.read()) + if isinstance(data, dict): + ops = list(data.get("operations", [])) + elif isinstance(data, list): + ops = list(data) + else: + ops = [] + except Exception: + log.debug("operation source: API call failed", exc_info=True) + return [] + if ctx.cache is not None: + ctx.cache.set(scope="", kind="operations", value=ops) + return ops + + +def complete_operation(text: str, ctx: CompletionContext) -> list[str]: + ops = fetch_operations(ctx) + results: list[str] = [] + for op in ops: + uuid_str = op.get("uuid") + if isinstance(uuid_str, str) and uuid_str.startswith(text): + results.append(uuid_str + " ") + return results + + +def complete_session(text: str, ctx: CompletionContext) -> list[str]: + """Complete session keys plus their underscore-suffix shorthand.""" + if ctx.store is None: + return [] + try: + sessions = ctx.store.list_sessions() + except Exception: + log.debug("session source: store call failed", exc_info=True) + return [] + results: list[str] = [] + for s in sessions: + key = s.session_key + if key.startswith(text): + results.append(key + " ") + suffix = key.rsplit("_", 1)[-1] if "_" in key else "" + if suffix and suffix != key and suffix.startswith(text): + results.append(suffix + " ") + return results + + +def complete_branch(text: str, ctx: CompletionContext) -> list[str]: + if ctx.store is None: + return [] + try: + branches = ctx.store.list_branches() + except Exception: + log.debug("branch source: store call failed", exc_info=True) + return [] + return [name + " " for name, _active in branches if name.startswith(text)] + + +# --------------------------------------------------------------------------- +# Sandbox path sources (use /inspect//list) +# --------------------------------------------------------------------------- + + +def list_sandbox_dir( + ctx: CompletionContext, + image_uuid: str, + dir_path: str, +) -> list[dict[str, object]] | None: + """Cache-aware wrapper around ``/v1/inspect//list``. + + Returns ``None`` on hard failures so the caller can short-circuit. + """ + if ctx.client is None: + return None + cache_kind = f"files:{dir_path}" + if ctx.cache is not None: + cached = ctx.cache.get(scope=image_uuid, kind=cache_kind, ttl=30.0) + if isinstance(cached, list): + return cached + try: + from contree_cli.client import resolve_image + + uuid = resolve_image(ctx.client, image_uuid) + resp = ctx.client.get( + f"/v1/inspect/{uuid}/list", + params={"path": dir_path}, + ) + data = json.loads(resp.read()) + files: list[dict[str, object]] = data.get("files", []) + except Exception: + log.debug("sandbox source: API call failed", exc_info=True) + return None + if ctx.cache is not None: + ctx.cache.set(scope=image_uuid, kind=cache_kind, value=files) + return files + + +def complete_sandbox( + text: str, + ctx: CompletionContext, + *, + dirs_only: bool = False, + rooted_at: str | None = None, +) -> list[str]: + """Generic sandbox path completion. + + *rooted_at* forces the resolution root (used for the instance-path + segment of ``--file`` mapped specs which always starts at ``/``). + """ + if ctx.store is None or ctx.client is None: + return [] + session = ctx.store.session + if session is None: + return [] + image_uuid = session.current_image + + if "/" in text: + last_slash = text.rindex("/") + user_dir = text[: last_slash + 1] or "/" + prefix = text[last_slash + 1 :] + if rooted_at is not None and not user_dir.startswith("/"): + user_dir = rooted_at.rstrip("/") + "/" + user_dir + resolved = ( + ctx.store.resolve_path(user_dir) + if rooted_at is None + else ( + posixpath.normpath(user_dir) if user_dir.startswith("/") else user_dir + ) + ) + api_dir = resolved if resolved == "/" else resolved + "/" + else: + user_dir = "" + prefix = text + if rooted_at is not None: + api_dir = "/" if rooted_at == "/" else rooted_at + "/" + else: + resolved = ctx.store.resolve_path("") + api_dir = resolved if resolved == "/" else resolved + "/" + + if ctx.list_dir is not None: + entries = ctx.list_dir(image_uuid, api_dir) + else: + entries = list_sandbox_dir(ctx, image_uuid, api_dir) + if entries is None: + return [] + + results: list[str] = [] + for entry in entries: + path = entry.get("path", "") + if not isinstance(path, str) or not path: + continue + is_dir = bool(entry.get("is_dir")) + if dirs_only and not is_dir: + continue + name = path.rsplit("/", 1)[-1] + if not name.startswith(prefix): + continue + full = user_dir + name + if is_dir: + full += "/" + else: + full += " " + results.append(full) + return results + + +def complete_sandbox_path(text: str, ctx: CompletionContext) -> list[str]: + return complete_sandbox(text, ctx) + + +def complete_sandbox_dir(text: str, ctx: CompletionContext) -> list[str]: + return complete_sandbox(text, ctx, dirs_only=True) + + +# --------------------------------------------------------------------------- +# Local file system path source +# --------------------------------------------------------------------------- + + +def complete_host_path(text: str, ctx: CompletionContext) -> list[str]: + """Complete a local filesystem path. Honours ``~`` and ``./``.""" + expanded = os.path.expanduser(text) if text.startswith("~") else text + if "/" in expanded: + slash = expanded.rindex("/") + directory = expanded[: slash + 1] or "/" + prefix = expanded[slash + 1 :] + user_directory = text[: text.rindex("/") + 1] if "/" in text else "" + else: + directory = "." + prefix = expanded + user_directory = "" + + try: + entries = list(os.scandir(directory)) + except OSError: + return [] + + results: list[str] = [] + for entry in entries: + name = entry.name + if not name.startswith(prefix): + continue + is_dir = entry.is_dir(follow_symlinks=False) + full = user_directory + name + ("/" if is_dir else " ") + results.append(full) + return results + + +# --------------------------------------------------------------------------- +# Mapped --file value (whole-token replacement) +# --------------------------------------------------------------------------- + + +TAG_RE = re.compile(r"^[ugm]") + + +def complete_mapped_file(text: str, ctx: CompletionContext) -> list[str]: + """Complete a ``--file host[:inst][:u][:g][:m]`` value. + + readline's delimiters do not include ``:``, so ``text`` is the entire + value. Each candidate must therefore replace the whole token, not just + the trailing segment. + """ + parts = split_mapped_value(text) + head = parts[:-1] + tail = parts[-1] if parts else "" + prefix = (":".join(head) + ":") if head else "" + + # Segment 0: host path completion. Trailing "/" for dirs (no space) so the + # user can keep typing "/foo" or ":m0" next. + if not head: + host_candidates = complete_host_path(tail, ctx) + return [prefix + cand.rstrip(" ") for cand in host_candidates] + + # Subsequent segments. + if tail.startswith("/") or tail == "": + # Instance path: complete against sandbox rooted at /, plus optional + # tag tokens when the tail is empty. + results: list[str] = [] + if tail.startswith("/"): + sandbox_candidates = complete_sandbox(tail, ctx, rooted_at="/") + for cand in sandbox_candidates: + clean = cand.rstrip(" ") + results.append(prefix + clean) + return results + # Empty tail right after a colon: offer tag tokens that have not been + # seen yet, plus a "/" hint to start an instance path. + seen_tags = {p[0] for p in head[1:] if p and p[0] in TAG_LETTERS} + for tag in TAG_LETTERS: + if tag not in seen_tags: + results.append(prefix + tag) + if not any(p.startswith("/") for p in head[1:]): + results.append(prefix + "/") + return results + + if TAG_RE.match(tail): + # Tagged option being typed (e.g. "m0755"). No further completion + # logic, just echo back so readline does not delete the value. + return [prefix + tail] + + return [] + + +# --------------------------------------------------------------------------- +# Misc sources +# --------------------------------------------------------------------------- + + +def complete_format(text: str, ctx: CompletionContext) -> list[str]: + return with_trailing_space(sorted(FORMATTERS), text) + + +def complete_editor(text: str, ctx: CompletionContext) -> list[str]: + return with_trailing_space(EDITOR_ALIASES, text) + + +def complete_skill_spec(text: str, ctx: CompletionContext) -> list[str]: + candidates = [prefix for prefix in SKILL_SPEC_PREFIXES if prefix.startswith(text)] + if not candidates and ("/" in text or text.startswith(".")): + return complete_host_path(text, ctx) + return candidates + + +def complete_profile(text: str, ctx: CompletionContext) -> list[str]: + try: + cfg = Config() + except Exception: + log.debug("profile source: failed to load Config", exc_info=True) + return [] + return with_trailing_space(sorted(cfg.keys()), text) + + +def complete_env_key(text: str, ctx: CompletionContext) -> list[str]: + if ctx.store is None: + return [] + try: + env = ctx.store.get_env() + except Exception: + log.debug("env-key source: store call failed", exc_info=True) + return [] + return with_trailing_space(sorted(env.keys()), text) + + +def complete_command_name(text: str, ctx: CompletionContext) -> list[str]: + from contree_cli.shell.parser import get_command_names + + builtins = ("cd", "pwd", "history", "help", "clear", "exit", "quit") + aliases = ("ls", "cat", "vim", "vi", "nvim", "nano", "--format", "-f") + names = sorted({*get_command_names(), *builtins, *aliases}) + return with_trailing_space(names, text) + + +# --------------------------------------------------------------------------- +# Registry +# --------------------------------------------------------------------------- + + +SOURCES: dict[str, SourceFn] = { + "image": complete_image, + "operation": complete_operation, + "session": complete_session, + "branch": complete_branch, + "sandbox-path": complete_sandbox_path, + "sandbox-dir": complete_sandbox_dir, + "host-path": complete_host_path, + "mapped-file": complete_mapped_file, + "format": complete_format, + "editor": complete_editor, + "skill-spec": complete_skill_spec, + "profile": complete_profile, + "env-key": complete_env_key, + "command-name": complete_command_name, +} + + +def complete_choices( + choices: Iterable[object], + text: str, +) -> list[str]: + """Auto-bound source for actions with ``choices=``.""" + names = sorted(str(c) for c in choices) + return with_trailing_space(names, text) diff --git a/docs/commands/index.md b/docs/commands/index.md index e34a856..49dd646 100644 --- a/docs/commands/index.md +++ b/docs/commands/index.md @@ -14,6 +14,7 @@ tag ps kill show +operation ls cat cp diff --git a/docs/commands/kill.md b/docs/commands/kill.md index aaee23d..78e7315 100644 --- a/docs/commands/kill.md +++ b/docs/commands/kill.md @@ -29,4 +29,5 @@ interrupted. ## See also - {doc}`ps` -- list operations to find UUIDs +- {doc}`operation` -- multi-UUID variant: `contree op cancel UUID1 UUID2 ...` - {doc}`run` -- Ctrl-C during `contree run` also cancels the operation diff --git a/docs/commands/operation.md b/docs/commands/operation.md new file mode 100644 index 0000000..5878cae --- /dev/null +++ b/docs/commands/operation.md @@ -0,0 +1,123 @@ +# operation (op) + +Manage operations under a single namespace. Aggregates `ps` (list), +`show` (inspect), and `kill` (cancel), and adds **multi-UUID support** to +`show` and `cancel` so several operations can be acted on in one call. + +`op` is the short alias. + +## Subcommands + +| Subcommand | Aliases | Description | +|------------|---------|-------------| +| `list` | `ls` | List operations. Same flags as `contree ps`. | +| `show UUID [UUID...]` | -- | Show one or more operation results. | +| `cancel UUID [UUID...]` | -- | Cancel one or more operations (or `--all`). | + +## Examples + +```bash +# List active operations (same as `contree ps`) +contree op list +contree op ls +contree op ls -a -S FAILED # all flags from ps are accepted + +# Inspect a single operation +contree op show 3f2a7b... + +# Inspect several operations at once +contree op show 3f2a7b... a1b2c3... 9d8e7f... + +# History references (inherited from `contree show`) +contree op show @5 @4 @3 + +# Cancel one or more operations +contree op cancel 3f2a7b... +contree op cancel a1b2c3... 9d8e7f... + +# Cancel every active operation +contree op cancel --all +``` + +## Help output + +```{terminal-shell} contree op --help +``` + +```{terminal-shell} contree op list --help +``` + +```{terminal-shell} contree op show --help +``` + +```{terminal-shell} contree op cancel --help +``` + +## `op list` -- dynamic columns + +The listing renders **every scalar top-level field** the API returns, +not a hard-coded subset. When the server adds a new field (for example +`cost`, `project_id`, `started_at`), it appears in the output without a +CLI release. Nested structures (`metadata`, `result`, `tags`) are +filtered out -- use `op show UUID` for the detail view. + +Known fields are lightly typed: + +| Field | Transform | +|-------|-----------| +| `created_at`, `started_at`, `finished_at`, `updated_at` | parsed to UTC datetime | +| `duration` | wrapped as `timedelta` (`total_seconds()` in JSON) | +| `error` | `None` is rendered as empty string | + +Column order follows the API response, with one exception: **`error` +is pinned to the last column**. Long free-form error messages would +otherwise push the rest of the row out of alignment. + +## `op show` -- multiple UUIDs + +Each UUID is fetched and rendered through the same code path as +`contree show`, so cached terminal results and `@N` history references +work uniformly. On API errors (e.g. 404 for an unknown UUID), the +command logs the failure and continues with the remaining UUIDs, exiting +with status `1` at the end. + +:::{note} +With table output (`-f table`) and several UUIDs, each operation +currently renders as its own mini-table. Use `default` or `json` for a +unified stream view across multiple UUIDs. +::: + +## `op cancel` -- multiple UUIDs or `--all` + +Either pass UUIDs explicitly or use `--all` to cancel every active +operation (`PENDING`, `ASSIGNED`, `EXECUTING`). Combining both is allowed: +`--all` wins, and the explicit UUIDs are ignored with a `WARNING`. As +with `op show`, errors on individual UUIDs do not abort the run; the +command exits `1` if any cancellation failed. + +```bash +# Mixed: --all still wins, "ignored-1" is not cancelled +contree op cancel --all ignored-1 +``` + +## Comparison with the top-level commands + +`contree op` does not replace `ps`/`show`/`kill` -- those keep their +single-target semantics. The new namespace exists for grouping and for +multi-UUID workflows: + +| Need | Use | +|------|-----| +| List active operations | `contree ps` *or* `contree op ls` | +| Inspect one operation | `contree show UUID` *or* `contree op show UUID` | +| Inspect multiple | `contree op show UUID1 UUID2 ...` | +| Cancel one operation | `contree kill UUID` *or* `contree op cancel UUID` | +| Cancel multiple | `contree op cancel UUID1 UUID2 ...` | +| Cancel everything active | `contree kill --all` *or* `contree op cancel --all` | + +## See also + +- {doc}`ps` -- single-purpose list command (delegated to by `op list`) +- {doc}`show` -- single-UUID inspect (delegated to by `op show`) +- {doc}`kill` -- single-UUID cancel +- {doc}`run` -- the command that creates operations diff --git a/docs/commands/ps.md b/docs/commands/ps.md index d103c4a..9c40539 100644 --- a/docs/commands/ps.md +++ b/docs/commands/ps.md @@ -46,8 +46,15 @@ contree ps -q | xargs -I {} contree show {} Without `-a`, only `PENDING`, `ASSIGNED`, and `EXECUTING` are shown. +## Dynamic output columns + +`ps` renders every scalar top-level field the API returns (not a fixed +subset), so new server fields appear automatically. `error` is pinned +to the last column. See {doc}`operation` for the full description. + ## See also - {doc}`show` -- inspect a specific operation - {doc}`kill` -- cancel a running operation +- {doc}`operation` -- group + multi-UUID variants (`contree op ls/show/cancel`) - {doc}`/tutorial/workflows` -- monitoring and scripting patterns diff --git a/docs/commands/shell.md b/docs/commands/shell.md index 60b2ef4..f0a2316 100644 --- a/docs/commands/shell.md +++ b/docs/commands/shell.md @@ -50,7 +50,7 @@ contree session branch experiment contree run -e DEBUG=1 -- ./app ``` -**Builtins** — handled locally by the shell: +**Builtins** -- handled locally by the shell: | Builtin | Description | |---------|-------------| @@ -59,6 +59,7 @@ contree run -e DEBUG=1 -- ./app | `history [N]` | Show command history (optional limit) | | `help [TOPIC]` | Show help (optionally for a specific command) | | `clear` | Clear the terminal screen | +| `timeout DURATION CMD...` | Run `CMD...` with the API operation timeout set to `DURATION` | | `--format NAME` / `-f NAME` | Change output format (or show current if no argument) | | `exit` / `quit` | Exit the shell (also Ctrl-D) | @@ -78,6 +79,61 @@ contree run -e DEBUG=1 -- ./app files exist or when args contain flags or glob characters. ::: +## Implicit run: shell-expression passthrough + +Bare commands are forwarded to the sandbox as a single shell expression with +`shell=True`. The entire input line is sent verbatim to the remote `sh -c`, +so operators like `|`, `;`, `&&`, `||`, `>`, `<` are interpreted by the +remote shell exactly as typed: + +```text +contree:/> mount | grep cgroup +contree:/> echo 1 ; echo 2 +contree:/> apt-get update && apt-get install -y curl +contree:/> uname -a > /tmp/info.txt +``` + +There is no local tokenize/rejoin step, so quoting is preserved: + +```text +contree:/> python3 -c "print('hello world')" +``` + +## `timeout` builtin + +The shell recognises `timeout DURATION CMD...` and sets the server-side +operation timeout to `DURATION` instead of running the GNU `timeout` binary +inside the sandbox. The kill is enforced by the API, not by a wrapper +process, so the operation surfaces a warning when the limit is hit: + +```text +contree:/> timeout 30 apk add gcc +contree:/> timeout 5m make build +contree:/> timeout 1h python long_train.py +``` + +`DURATION` is an integer or decimal optionally followed by a unit suffix: + +| Suffix | Meaning | +|--------|---------| +| (none) | Seconds | +| `s` | Seconds | +| `m` | Minutes | +| `h` | Hours | +| `d` | Days | + +If `DURATION` is not a valid spec (for example `timeout --kill-after=5 30 cmd` +or `timeout --help`), the shell falls through and sends the line to the +sandbox unchanged, so the in-image `timeout` binary still handles advanced +flags. + +When the limit is hit, the response carries `state.timed_out=true` and the +shell logs: + +```text +WARNING: Operation timed out after 30s +``` + ## Tab completion The shell provides context-aware tab completion for almost everything @@ -129,8 +185,9 @@ preserving the newline inside the quoted string. sandbox (works for remote commands, not for contree output). - **No job control**: No `&`, `bg`, `fg`, or Ctrl-Z. Use `contree run -d` for background tasks. -- **Bare commands use defaults**: No way to pass `--timeout`, `--env`, or - `--file` without the explicit `contree run` prefix. +- **Bare commands use defaults**: `--env`, `--file`, `--disposable`, and + `--detach` require the explicit `contree run` prefix. The operation + timeout has a shorthand: `timeout DURATION CMD...` (see above). - **No `~` or glob expansion**: Passed as-is to the sandbox. - **Cannot nest shells**: Running `contree shell` inside a shell is not supported. diff --git a/docs/commands/show.md b/docs/commands/show.md index 7d82687..6ca7fc6 100644 --- a/docs/commands/show.md +++ b/docs/commands/show.md @@ -40,4 +40,5 @@ For `csv`, `tsv`, and `table` formats, stdout/stderr are omitted -- use ## See also - {doc}`ps` -- list operations to find UUIDs +- {doc}`operation` -- multi-UUID variant: `contree op show UUID1 UUID2 ...` - {doc}`run` -- the command that creates operations diff --git a/docs/tutorial/shell.md b/docs/tutorial/shell.md index 7c49348..0c9696c 100644 --- a/docs/tutorial/shell.md +++ b/docs/tutorial/shell.md @@ -39,11 +39,23 @@ contree:/> apt-get install -y curl contree:/> curl https://example.com ``` -Each command is an implicit `contree run` with `shell=True` -- pipes, -redirects, and `&&` chains all work: +Each command is an implicit `contree run` with `shell=True`. The whole +input line is forwarded verbatim to the remote `sh -c`, so pipes, +redirects, `;`, `&&`, and `||` are interpreted by the sandbox shell +exactly as typed: ```text contree:/> echo hello && ls / | head -5 +contree:/> mount | grep cgroup +contree:/> echo 1 ; echo 2 +contree:/> uname -a > /tmp/info.txt +``` + +Quoting from your local prompt is also preserved through to the remote +shell: + +```text +contree:/> python3 -c "print('hello world')" ``` You can also use the explicit form, which is equivalent: @@ -269,6 +281,30 @@ Clear the terminal screen: contree:/> clear ``` +### `timeout` + +Run a command with a server-enforced operation timeout. Mirrors the GNU +`timeout` convention but sets `payload.timeout` on the API request instead +of spawning a local wrapper inside the sandbox: + +```text +contree:/> timeout 30 apk add gcc +contree:/> timeout 5m make build +contree:/> timeout 1h python long_train.py +``` + +`DURATION` accepts a bare integer or decimal (seconds by default) and +the suffixes `s`, `m`, `h`, `d`. When the value cannot be parsed, the +shell forwards the line untouched so the in-image `timeout` binary still +handles advanced flags like `--kill-after` or `-s SIGTERM`. + +When the limit fires, the API returns `state.timed_out=true` (status may +still be `SUCCESS` with `signal=9`), and the shell logs: + +```text +WARNING: Operation timed out after 30s +``` + ### `--format` / `-f` Change the output format mid-session, or show the current format: @@ -306,8 +342,9 @@ contree:/app> contree tag UUID my-app:v1 not interpreted locally. - **No job control** -- no `&`, `bg`, `fg`, or Ctrl-Z. Use `contree run -d` for detached execution. -- **Bare commands use defaults** -- you cannot pass `--timeout`, `--env`, - `--file`, or `--disposable` without the explicit `contree run` prefix. +- **Bare commands use defaults** -- you cannot pass `--env`, `--file`, or + `--disposable` without the explicit `contree run` prefix. The operation + timeout has a shell shortcut: `timeout DURATION CMD...` (see above). - **No `~` or glob expansion** -- these tokens are passed as-is to the sandbox. - **Image list cache** -- newly created images during a session won't appear diff --git a/tests/conftest.py b/tests/conftest.py index 4e3854f..7555d29 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,7 @@ import pytest +import contree_cli.arguments # noqa: F401 populates COMMAND_REGISTRY import contree_cli.config as config_mod from contree_cli import CLIENT, PROFILE from contree_cli.client import ContreeClient, ContreeIAMClient diff --git a/tests/test_argmap.py b/tests/test_argmap.py new file mode 100644 index 0000000..abb3d26 --- /dev/null +++ b/tests/test_argmap.py @@ -0,0 +1,150 @@ +"""Smoke tests for the shell completion (command_path, dest) registry. + +Guards against three regressions: + +1. Every source name referenced in :mod:`contree_cli.shell.argmap` must + exist in :mod:`contree_cli.shell.sources`.``SOURCES``. +2. Every registered ``(command_path, dest)`` must resolve to a real + argparse Action on the live parser tree -- if a command renames a + dest or moves a subcommand, this test catches it. +3. Alias dispatch (e.g. ``op show`` for ``operation show``) reaches the + same registry entry, so completion works regardless of which alias + the user typed. +""" + +from __future__ import annotations + +import argparse + +import pytest + +from contree_cli.arguments import parser as root_parser +from contree_cli.shell.argmap import ARG_SOURCES, lookup +from contree_cli.shell.argspec import find_subparsers, walk +from contree_cli.shell.sources import SOURCES + + +def find_action( + parser: argparse.ArgumentParser, + command_path: tuple[str, ...], + dest: str, +) -> argparse.Action | None: + """Walk subparsers by canonical name and locate an action by dest.""" + current = parser + for name in command_path: + sub = find_subparsers(current) + if sub is None: + return None + if name not in sub.choices: + return None + chosen = sub.choices[name] + assert isinstance(chosen, argparse.ArgumentParser) + current = chosen + for action in current._actions: + if action.dest == dest: + return action + return None + + +class TestRegistryIntegrity: + def test_every_source_name_is_registered(self): + unknown = { + (command_path, dest, source) + for (command_path, dest), source in ARG_SOURCES.items() + if source not in SOURCES + } + assert not unknown, f"argmap references unregistered sources: {unknown}" + + @pytest.mark.parametrize("key", list(ARG_SOURCES.keys())) + def test_every_key_resolves_to_a_live_action(self, key): + command_path, dest = key + action = find_action(root_parser, command_path, dest) + assert action is not None, ( + f"argmap entry {key!r} points at a non-existent argparse action; " + "did a command rename its dest or move its subcommand?" + ) + + def test_lookup_returns_none_for_unknown_key(self): + assert lookup(("nonexistent",), "foo") is None + assert lookup((), "no-such-flag") is None + + +class TestAliasDispatch: + """``op show`` and ``operation show`` resolve to the same registry entry.""" + + def test_op_alias_walks_to_canonical_operation(self): + result = walk(root_parser, ["op", "show", "uuid-1"]) + assert result.command_path == ("operation", "show") + + def test_list_alias_walks_to_canonical(self): + result = walk(root_parser, ["operation", "ls"]) + assert result.command_path == ("operation", "list") + + def test_session_aliases_normalised(self): + # `co` is the alias for `checkout` + result = walk(root_parser, ["session", "co", "main"]) + assert result.command_path == ("session", "checkout") + + def test_op_show_resolves_operation_source(self): + # Walking via the alias plus argmap lookup must find the "operation" source. + result = walk(root_parser, ["op", "show"]) + assert lookup(result.command_path, "uuids") == "operation" + + def test_kill_uuid_resolves_operation_source(self): + result = walk(root_parser, ["kill"]) + assert lookup(result.command_path, "uuid") == "operation" + + def test_run_use_flag_resolves_image_source(self): + # `--use` is a flag, walk just records it; positional/flag-value + # resolution still hits the same `(("run",), "use")` entry. + walk(root_parser, ["run", "--use"]) + assert lookup(("run",), "use") == "image" + + +class TestRepresentativeMappings: + """Spot-check a handful of registry entries end-to-end.""" + + @pytest.mark.parametrize( + "command_path,dest,expected", + [ + (("show",), "uuid", "operation"), + (("use",), "image", "image"), + (("tag",), "args", "image"), + (("ls",), "path", "sandbox-path"), + (("cd",), "path", "sandbox-dir"), + (("cp",), "dest", "host-path"), + (("env",), "vars", "env-key"), + (("run",), "command_args", "sandbox-path"), + (("run",), "cwd", "sandbox-dir"), + (("run",), "file", "mapped-file"), + (("file", "edit"), "path", "sandbox-path"), + (("file", "edit"), "editor", "editor"), + (("file", "cp"), "src", "host-path"), + (("session", "checkout"), "checkout_branch", "branch"), + (("session", "branch"), "from_branch", "branch"), + (("session", "wait"), "op_ids", "operation"), + (("operation", "show"), "uuids", "operation"), + (("operation", "cancel"), "uuids", "operation"), + (("auth", "switch"), "profile_name", "profile"), + (("skill", "install"), "specs", "skill-spec"), + ((), "profile", "profile"), + ], + ) + def test_mapping(self, command_path, dest, expected): + assert lookup(command_path, dest) == expected + + +class TestCliFilesHaveNoShellImports: + """Layering: cli/ must not depend on shell/.""" + + def test_no_argspec_imports_in_cli(self): + import pathlib + + repo = pathlib.Path(__file__).resolve().parent.parent + cli_dir = repo / "contree_cli" / "cli" + offenders: list[str] = [] + for path in sorted(cli_dir.glob("*.py")): + text = path.read_text(encoding="utf-8") + if "contree_cli.shell.argspec" in text: + offenders.append(str(path.relative_to(repo))) + assert not offenders, f"cli/ must not import from shell.argspec: {offenders}" diff --git a/tests/test_operation.py b/tests/test_operation.py new file mode 100644 index 0000000..bb87b72 --- /dev/null +++ b/tests/test_operation.py @@ -0,0 +1,307 @@ +from __future__ import annotations + +from contextvars import copy_context + +import pytest +from conftest import ContreeTestClient + +from contree_cli import CLIENT, FORMATTER, SESSION_STORE +from contree_cli.arguments import parser +from contree_cli.cli.kill import ACTIVE_STATUSES +from contree_cli.cli.operation import ( + CancelArgs, + ShowMultiArgs, + cmd_cancel, + cmd_show_multi, +) +from contree_cli.output import CSVFormatter, JSONFormatter +from contree_cli.session import SessionStore + + +def make_op( + uuid: str = "op-1", + *, + status: str = "SUCCESS", + kind: str = "instance", + duration: float = 1.5, + error: str | None = None, + image: str = "img-1", + tag: str = "latest", +) -> dict: + return { + "uuid": uuid, + "kind": kind, + "status": status, + "error": error, + "duration": duration, + "metadata": {"result": None}, + "result": {"image": image, "tag": tag, "duration": None}, + "created_at": "2025-06-01T00:00:00Z", + } + + +def run_show_multi( + tc: ContreeTestClient, + ops: list[dict], + *, + formatter=None, + store: SessionStore, +) -> int | None: + for op in ops: + tc.respond_json(op) + FORMATTER.set(formatter or CSVFormatter()) + SESSION_STORE.set(store) + ctx = copy_context() + args = ShowMultiArgs(uuids=[op["uuid"] for op in ops]) + return ctx.run(cmd_show_multi, args) + + +def run_cancel( + tc: ContreeTestClient, + *, + uuids: list[str] | None = None, + all_flag: bool = False, + list_pages: list[list[dict]] | None = None, + delete_statuses: list[int] | None = None, +) -> int | None: + if list_pages is not None: + for page in list_pages: + tc.respond_json(page) + for status in delete_statuses or []: + tc.respond(status=status, body=b"") + CLIENT.set(tc) + ctx = copy_context() + args = CancelArgs(uuids=uuids or [], all=all_flag) + return ctx.run(cmd_cancel, args) + + +# ---------------------------------------------------------------------- +# argparse wiring +# ---------------------------------------------------------------------- + + +class TestArgparseWiring: + def test_op_alias_resolves_to_operation(self): + ns = parser.parse_args(["op", "ls"]) + assert ns.command in ("operation", "op") + assert ns.operation_action == "ls" + + def test_show_requires_at_least_one_uuid(self, capsys): + with pytest.raises(SystemExit): + parser.parse_args(["op", "show"]) + err = capsys.readouterr().err + assert "uuids" in err.lower() or "required" in err.lower() + + def test_show_accepts_multiple_uuids(self): + ns = parser.parse_args(["op", "show", "a", "b", "c"]) + assert ns.uuids == ["a", "b", "c"] + assert ns.handler is cmd_show_multi + + def test_cancel_accepts_multiple_uuids(self): + ns = parser.parse_args(["op", "cancel", "x", "y"]) + assert ns.uuids == ["x", "y"] + assert ns.all is False + assert ns.handler is cmd_cancel + + def test_cancel_all_flag(self): + ns = parser.parse_args(["op", "cancel", "--all"]) + assert ns.all is True + assert ns.uuids == [] + + def test_list_delegates_to_ps_handler(self): + from contree_cli.cli.ps import cmd_ps + + ns = parser.parse_args(["op", "list", "-q"]) + assert ns.handler is cmd_ps + assert ns.quiet is True + + def test_list_ls_alias(self): + from contree_cli.cli.ps import cmd_ps + + ns = parser.parse_args(["op", "ls"]) + assert ns.handler is cmd_ps + + +# ---------------------------------------------------------------------- +# op show +# ---------------------------------------------------------------------- + + +class TestOperationShow: + def test_show_single_uuid(self, contree_client, session_store, capsys): + rc = run_show_multi( + contree_client, + [make_op("op-a")], + formatter=JSONFormatter(), + store=session_store, + ) + assert rc is None + out = capsys.readouterr().out + assert "op-a" in out + assert contree_client.request_count == 1 + + def test_show_multiple_uuids_issues_one_get_per_uuid( + self, contree_client, session_store, capsys + ): + ops = [make_op("op-a"), make_op("op-b"), make_op("op-c")] + rc = run_show_multi( + contree_client, + ops, + formatter=JSONFormatter(), + store=session_store, + ) + assert rc is None + assert contree_client.request_count == 3 + out = capsys.readouterr().out + assert "op-a" in out + assert "op-b" in out + assert "op-c" in out + # All three are GETs on /v1/operations/{uuid} + for i, op in enumerate(ops): + req = contree_client.get_request(i) + assert req.method == "GET" + assert req.path == f"/v1/operations/{op['uuid']}" + + def test_show_continues_on_api_error( + self, contree_client, session_store, caplog, capsys + ): + # First UUID -> 404, then a successful one + contree_client.respond(status=404, body=b"not found") + contree_client.respond_json(make_op("op-b")) + + FORMATTER.set(JSONFormatter()) + SESSION_STORE.set(session_store) + ctx = copy_context() + args = ShowMultiArgs(uuids=["op-a", "op-b"]) + + with caplog.at_level("ERROR"): + rc = ctx.run(cmd_show_multi, args) + + assert rc == 1 + assert "Failed to fetch op-a" in caplog.text + out = capsys.readouterr().out + # Second UUID still got rendered + assert "op-b" in out + + def test_show_history_reference_uses_session_store( + self, contree_client, session_store + ): + # Seed a history entry tied to a known op UUID, then reference it as @1 + session_store.set_image("img-1", kind="use", title="use img-1") + session_store.set_image( + "img-2", + kind="run", + title="echo hi", + operation_uuid="op-from-history", + ) + contree_client.respond_json(make_op("op-from-history")) + + FORMATTER.set(CSVFormatter()) + SESSION_STORE.set(session_store) + ctx = copy_context() + args = ShowMultiArgs(uuids=["@2"]) + rc = ctx.run(cmd_show_multi, args) + + assert rc is None + assert contree_client.request_count == 1 + assert contree_client.get_request(0).path == "/v1/operations/op-from-history" + + +# ---------------------------------------------------------------------- +# op cancel +# ---------------------------------------------------------------------- + + +class TestOperationCancel: + def test_cancel_single_uuid(self, contree_client, caplog): + with caplog.at_level("INFO"): + rc = run_cancel( + contree_client, + uuids=["op-a"], + delete_statuses=[202], + ) + assert rc is None + assert contree_client.request_count == 1 + req = contree_client.get_request(0) + assert req.method == "DELETE" + assert req.path == "/v1/operations/op-a" + assert "Cancelled operation op-a" in caplog.text + + def test_cancel_multiple_uuids(self, contree_client, caplog): + with caplog.at_level("INFO"): + rc = run_cancel( + contree_client, + uuids=["op-a", "op-b", "op-c"], + delete_statuses=[202, 202, 202], + ) + assert rc is None + assert contree_client.request_count == 3 + for i, uuid in enumerate(["op-a", "op-b", "op-c"]): + req = contree_client.get_request(i) + assert req.method == "DELETE" + assert req.path == f"/v1/operations/{uuid}" + + def test_cancel_continues_on_error(self, contree_client, caplog): + with caplog.at_level("INFO"): + rc = run_cancel( + contree_client, + uuids=["op-a", "op-b"], + delete_statuses=[409, 202], + ) + assert rc == 1 + assert "Failed to cancel op-a" in caplog.text + assert "Cancelled operation op-b" in caplog.text + + def test_cancel_requires_uuids_or_all(self, contree_client, caplog): + with caplog.at_level("ERROR"): + rc = run_cancel(contree_client) + assert rc == 1 + assert "Provide at least one UUID" in caplog.text + assert contree_client.request_count == 0 + + def test_cancel_all_iterates_active_statuses(self, contree_client, caplog): + # One op per active status, then DELETE for each + list_pages = [[{"uuid": f"{s.lower()}-0"}] for s in ACTIVE_STATUSES] + with caplog.at_level("INFO"): + rc = run_cancel( + contree_client, + all_flag=True, + list_pages=list_pages, + delete_statuses=[202] * len(ACTIVE_STATUSES), + ) + assert rc is None + # 3 GETs + 3 DELETEs (one per active status) + assert contree_client.request_count == 2 * len(ACTIVE_STATUSES) + for status in ACTIVE_STATUSES: + assert f"Cancelled operation {status.lower()}-0" in caplog.text + + def test_cancel_all_with_no_active(self, contree_client, caplog): + list_pages = [[] for _ in ACTIVE_STATUSES] + with caplog.at_level("INFO"): + rc = run_cancel( + contree_client, + all_flag=True, + list_pages=list_pages, + ) + assert rc is None + # Only GETs, no DELETEs + assert contree_client.request_count == len(ACTIVE_STATUSES) + assert "No active operations" in caplog.text + + def test_cancel_all_overrides_explicit_uuids(self, contree_client, caplog): + """--all wins; explicit UUIDs are ignored with a WARNING.""" + list_pages = [[{"uuid": "pending-0"}]] + [[] for _ in ACTIVE_STATUSES[1:]] + with caplog.at_level("WARNING"): + rc = run_cancel( + contree_client, + uuids=["ignored-1", "ignored-2"], + all_flag=True, + list_pages=list_pages, + delete_statuses=[202], + ) + assert rc is None + assert "--all overrides explicit UUIDs" in caplog.text + # Only one DELETE went out -- for pending-0, not the ignored UUIDs + deletes = [r for r in contree_client.fake.requests if r.method == "DELETE"] + assert len(deletes) == 1 + assert deletes[0].path == "/v1/operations/pending-0" diff --git a/tests/test_ps.py b/tests/test_ps.py index b838c6f..3a206e4 100644 --- a/tests/test_ps.py +++ b/tests/test_ps.py @@ -93,6 +93,76 @@ def test_table_output(self, contree_client, capsys): assert "UUID" in lines[0] +class TestPsDynamicFields: + """`emit_op` propagates every scalar field the API returns.""" + + def test_unknown_top_level_field_appears_in_row(self, contree_client, capsys): + """Server-side additions (e.g. ``cost``) show up without code changes.""" + op = _make_op(0) + op["cost"] = 0.0042 + op["project_id"] = "proj-abc" + _run_cmd(contree_client, [op], formatter=JSONFormatter()) + parsed = json.loads(capsys.readouterr().out) + assert parsed["cost"] == 0.0042 + assert parsed["project_id"] == "proj-abc" + + def test_nested_dict_field_skipped(self, contree_client, capsys): + """Nested structures (metadata, result) are filtered out of the row.""" + op = _make_op(0) + op["metadata"] = {"big": "object"} + op["result"] = {"image": "img-1"} + _run_cmd(contree_client, [op], formatter=JSONFormatter()) + parsed = json.loads(capsys.readouterr().out) + assert "metadata" not in parsed + assert "result" not in parsed + assert parsed["uuid"] == "op-0" + + def test_nested_list_field_skipped(self, contree_client, capsys): + op = _make_op(0) + op["tags"] = ["a", "b", "c"] + _run_cmd(contree_client, [op], formatter=JSONFormatter()) + parsed = json.loads(capsys.readouterr().out) + assert "tags" not in parsed + + def test_new_datetime_field_parsed(self, contree_client, capsys): + """``finished_at``/``updated_at`` are auto-parsed like ``created_at``.""" + op = _make_op(0) + op["finished_at"] = "2025-06-01T01:00:00Z" + _run_cmd(contree_client, [op], formatter=JSONFormatter()) + parsed = json.loads(capsys.readouterr().out) + # JSONFormatter serialises datetimes via _json_default -> isoformat. + assert parsed["finished_at"].startswith("2025-06-01T01:00:00") + + def test_error_is_always_last_column(self, contree_client, capsys): + """``error`` is pinned to the trailing position regardless of API order.""" + # Build an op where `error` is *not* the last key in insertion order. + op = { + "uuid": "op-0", + "error": "boom", + "status": "FAILED", + "kind": "instance", + "duration": 1.0, + "created_at": "2025-06-01T00:00:00Z", + } + _run_cmd(contree_client, [op], formatter=JSONFormatter()) + parsed = json.loads(capsys.readouterr().out) + keys = list(parsed.keys()) + assert keys[-1] == "error" + assert parsed["error"] == "boom" + + def test_error_last_even_when_added_field_present(self, contree_client, capsys): + """A new server field appears before ``error`` in the row.""" + op = _make_op(0, status="FAILED") + op["error"] = "oom" + op["cost"] = 0.01 # server field added after `error` in the response + _run_cmd(contree_client, [op], formatter=JSONFormatter()) + parsed = json.loads(capsys.readouterr().out) + keys = list(parsed.keys()) + assert keys[-1] == "error" + assert "cost" in keys + assert keys.index("cost") < keys.index("error") + + class TestPsParams: def test_status_param(self, contree_client): _run_cmd(contree_client, [], status="FAILED") diff --git a/tests/test_run.py b/tests/test_run.py index 7e5d3cb..720da2e 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -49,12 +49,15 @@ def _op_response( duration: float | None = 2.0, error: str | None = None, image: str = IMG_NEW, + state_extra: dict | None = None, ) -> FakeResponse: - state = {} + state: dict = {} if exit_code is not None: state["exit_code"] = exit_code + if state_extra: + state.update(state_extra) instance_result: dict | None = None - if stdout is not None or stderr is not None or exit_code is not None: + if stdout is not None or stderr is not None or state: instance_result = { "stdout": stdout, "stderr": stderr, @@ -221,6 +224,61 @@ def test_failed_with_exit_code(self, contree_client, session_store): rc = _run_cmd(contree_client, args, responses, store=session_store) assert rc == 137 + def test_timed_out_logs_warning(self, contree_client, session_store, caplog): + """``state.timed_out=true`` triggers a WARNING regardless of status. + + The API can report SUCCESS while still flagging the process as killed + by the user-set timeout (signal=9, exit_code=-1, timed_out=true). + """ + session_store.set_image(IMG_UUID, kind="test") + args = _default_args(timeout=60) + responses = [ + _spawn_response(), + _op_response( + status="SUCCESS", + exit_code=-1, + state_extra={"timed_out": True, "signal": 9}, + ), + ] + with caplog.at_level(logging.WARNING, logger="contree_cli.cli.run"): + _run_cmd(contree_client, args, responses, store=session_store) + + records = [r for r in caplog.records if "timed out" in r.getMessage()] + assert len(records) == 1 + assert records[0].levelno == logging.WARNING + assert "60s" in records[0].getMessage() + + def test_failed_without_timeout_still_fatal( + self, contree_client, session_store, caplog + ): + """A non-timeout FAILED keeps emitting at FATAL severity.""" + session_store.set_image(IMG_UUID, kind="test") + args = _default_args() + responses = [ + _spawn_response(), + _op_response(status="FAILED", exit_code=1, error="oom"), + ] + with caplog.at_level(logging.WARNING, logger="contree_cli.cli.run"): + _run_cmd(contree_client, args, responses, store=session_store) + + ended = [r for r in caplog.records if "ended with status" in r.getMessage()] + assert len(ended) == 1 + assert ended[0].levelno == logging.CRITICAL + + def test_success_without_timeout_logs_nothing( + self, contree_client, session_store, caplog + ): + """Plain SUCCESS does not emit a timeout warning.""" + session_store.set_image(IMG_UUID, kind="test") + args = _default_args() + responses = [ + _spawn_response(), + _op_response(status="SUCCESS", exit_code=0), + ] + with caplog.at_level(logging.WARNING, logger="contree_cli.cli.run"): + _run_cmd(contree_client, args, responses, store=session_store) + assert [r for r in caplog.records if "timed out" in r.getMessage()] == [] + def test_exit_code_propagated(self, contree_client, session_store): session_store.set_image(IMG_UUID, kind="test") args = _default_args() @@ -997,6 +1055,30 @@ def test_shell_does_not_overquote_simple_tokens( body = json.loads(spawn_req.body) assert body["command"] == "ls -la /etc" + def test_shell_passes_single_expression_verbatim( + self, contree_client, session_store + ): + """Single arg is treated as a pre-formed shell expression. + + `contree run -s -- 'echo 1 ; echo 2'` produces command_args with one + element. Wrapping it via shlex.join would quote the whole string and + sh -c would try to exec the literal as a command name. + """ + session_store.set_image(IMG_UUID, kind="test") + args = _default_args( + command_args=["echo 1 ; echo 2"], + shell=True, + ) + responses = [ + _spawn_response(), + _op_response(status="SUCCESS", exit_code=0), + ] + _run_cmd(contree_client, args, responses, store=session_store) + spawn_req = contree_client.get_request(0) + body = json.loads(spawn_req.body) + assert body["command"] == "echo 1 ; echo 2" + assert body["shell"] is True + # ── Session update on success ──────────────────────────────────────────── diff --git a/tests/test_session.py b/tests/test_session.py index b98698f..64b99e3 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -991,3 +991,23 @@ def test_depth_matches_entries(self, session_store: SessionStore): assert session_store.history_depth() == 2 session_store.set_image("img-3", kind="run") assert session_store.history_depth() == 3 + + +class TestConcurrentStores: + def test_two_stores_on_same_db_do_not_lock(self, tmp_path: Path): + # Two `contree shell` tabs share one per-profile SQLite file. + # WAL + busy_timeout must let interleaved writes succeed. + db = tmp_path / "shared.db" + store_a = SessionStore(db, "sess-a") + store_b = SessionStore(db, "sess-b") + try: + for i in range(20): + store_a.set_image(f"img-a-{i}", kind="run") + store_b.set_image(f"img-b-{i}", kind="run") + store_a.cache[(f"img-a-{i}", "list:/etc")] = {"n": i} + store_b.cache[(f"img-b-{i}", "list:/etc")] = {"n": i} + assert store_a.history_depth() == 20 + assert store_b.history_depth() == 20 + finally: + store_a.close() + store_b.close() diff --git a/tests/test_shell_completer.py b/tests/test_shell_completer.py index a6e4b45..a0ce6ff 100644 --- a/tests/test_shell_completer.py +++ b/tests/test_shell_completer.py @@ -591,7 +591,7 @@ def test_root_slash_does_not_produce_double_slash(self, image_cache): files = [_make_file("/etc", is_dir=True)] completer, _client = _path_completer(files, image_cache, cwd="/") - with patch.object(completer, "_list_dir", wraps=completer._list_dir) as spy: + with patch.object(completer, "list_dir", wraps=completer.list_dir) as spy: _complete_line(completer, "/", "ls /", begidx=3) spy.assert_called_once() queried_path = spy.call_args[0][1] @@ -602,7 +602,7 @@ def test_dotdot_normalized_in_completion(self, image_cache): files = [_make_file("/etc/hosts")] completer, _client = _path_completer(files, image_cache, cwd="/tmp") - with patch.object(completer, "_list_dir", wraps=completer._list_dir) as spy: + with patch.object(completer, "list_dir", wraps=completer.list_dir) as spy: results = _complete_line( completer, "../etc/", @@ -1154,3 +1154,280 @@ def test_contree_format_flag_completes(self): assert "table" in names assert "tsv" in names assert "json" not in names + + +class TestArgparseDrivenCompletion: + """New argparse-walker dispatch covers nested subcommand positionals.""" + + def test_session_delete_completes_session_keys(self, image_cache): + sessions = [ + Session( + session_key="alpha_keep", + active_branch="main", + current_image="img-1", + last_kind="run", + last_title="t", + updated_at="2025-01-01", + ), + Session( + session_key="beta_drop", + active_branch="main", + current_image="img-2", + last_kind="run", + last_title="t", + updated_at="2025-01-01", + ), + ] + completer = _session_completer(image_cache, sessions=sessions) + results = _complete_line( + completer, + "", + "contree session delete ", + begidx=23, + ) + assert "alpha_keep " in results + assert "beta_drop " in results + + def test_session_show_completes_session_keys(self, image_cache): + sessions = [ + Session( + session_key="alpha_one", + active_branch="main", + current_image="img-1", + last_kind="run", + last_title="t", + updated_at="2025-01-01", + ), + ] + completer = _session_completer(image_cache, sessions=sessions) + results = _complete_line( + completer, + "", + "contree session show ", + begidx=21, + ) + assert "alpha_one " in results + + def test_run_use_flag_value_completes_image(self, image_cache): + images = [_make_image("img-1", tag="ubuntu:noble")] + completer, _client = _image_completer(images, image_cache) + results = _complete_line( + completer, + "", + "contree run --use ", + begidx=18, + ) + assert "tag:ubuntu:noble " in results + assert "img-1 " in results + + def test_run_equals_form_use_flag(self, image_cache): + images = [_make_image("img-1", tag="ubuntu:noble")] + completer, _client = _image_completer(images, image_cache) + results = _complete_line( + completer, + "--use=tag:ub", + "contree run --use=tag:ub", + begidx=12, + ) + assert "tag:ubuntu:noble " in results + + def test_run_remainder_swallows_flags(self): + completer = _make_completer() + results = _complete_line( + completer, + "--", + "contree run -- ./script --", + begidx=24, + ) + assert results == [] + + def test_run_remainder_completes_sandbox_path(self, image_cache): + files = [_make_file("/etc/hosts")] + completer, _client = _path_completer(files, image_cache) + results = _complete_line( + completer, + "/etc/", + "contree run -- /etc/", + begidx=15, + ) + assert "/etc/hosts " in results + + def test_help_topic_completes_run(self): + completer = _make_completer() + results = _complete_line(completer, "ru", "help ru", begidx=5) + assert "run " in results + + def test_session_branch_from_completes(self, image_cache): + branches = [("main", True), ("feature", False)] + completer = _session_completer(image_cache, branches=branches) + results = _complete_line( + completer, + "f", + "contree session branch new --from f", + begidx=34, + ) + assert "feature " in results + + def test_run_cwd_flag_dirs_only(self, image_cache): + files = [ + _make_file("/etc/conf.d", is_dir=True), + _make_file("/etc/hosts"), + ] + completer, _client = _path_completer(files, image_cache) + results = _complete_line( + completer, + "/etc/", + "contree run --cwd /etc/", + begidx=18, + ) + assert "/etc/conf.d/" in results + assert "/etc/hosts " not in results + + def test_help_value_excludes_after_help_flag(self): + completer = _make_completer() + results = _complete_line( + completer, + "", + "contree run --help ", + begidx=19, + ) + # After --help, argparse stops. We return nothing. + assert results == [] + + def test_choices_auto_bind_via_format_flag(self): + # Auto-bind for actions with `choices=` works through the argparse + # walker (the trie path also handles --format; this exercises the + # walker via the contree-prefixed form). + completer = _make_completer() + results = _complete_line( + completer, + "", + "contree -f ", + begidx=11, + ) + names = [r.rstrip(" ") for r in results] + assert "json" in names + assert "table" in names + + +class TestMappedFileCompletion: + """`--file ./host:/inst` whole-token replacement completion.""" + + def test_initial_host_path(self, tmp_path, image_cache): + f = tmp_path / "Makefile" + f.write_text("") + files = [_make_file("/etc/hosts")] + completer, _client = _path_completer(files, image_cache) + results = _complete_line( + completer, + f"{tmp_path}/Make", + f"contree run --file {tmp_path}/Make", + begidx=19, + ) + assert any(r.endswith("/Makefile") for r in results) + + def test_after_first_colon_offers_tags(self, image_cache): + files = [_make_file("/etc/hosts")] + completer, _client = _path_completer(files, image_cache) + results = _complete_line( + completer, + "./Makefile:", + "contree run --file ./Makefile:", + begidx=19, + ) + # Empty tail after colon offers u/g/m + / + assert "./Makefile:u" in results + assert "./Makefile:g" in results + assert "./Makefile:m" in results + assert "./Makefile:/" in results + + def test_after_colon_with_slash_completes_sandbox_path(self, image_cache): + files = [_make_file("/etc/hosts")] + completer, _client = _path_completer(files, image_cache) + results = _complete_line( + completer, + "./Makefile:/etc/", + "contree run --file ./Makefile:/etc/", + begidx=19, + ) + assert "./Makefile:/etc/hosts" in results + + +class TestProfileNamespacing: + """Cache keys are namespaced by active profile name.""" + + def test_image_cache_per_profile(self, image_cache): + from contree_cli.shell.cache import SourceCache + + cache_a = SourceCache(image_cache, "alice") + cache_b = SourceCache(image_cache, "bob") + cache_a.set(scope="", kind="images", value=[{"uuid": "x"}]) + assert cache_a.get(scope="", kind="images", ttl=60.0) == [{"uuid": "x"}] + assert cache_b.get(scope="", kind="images", ttl=60.0) is None + + def test_invalidate_all_only_drops_active_profile(self, image_cache): + from contree_cli.shell.cache import SourceCache + + cache_a = SourceCache(image_cache, "alice") + cache_b = SourceCache(image_cache, "bob") + cache_a.set(scope="", kind="images", value=[1]) + cache_b.set(scope="", kind="images", value=[2]) + cache_a.invalidate_all() + assert cache_a.get(scope="", kind="images", ttl=60.0) is None + assert cache_b.get(scope="", kind="images", ttl=60.0) == [2] + + +class TestEnvKeySource: + """`env -d ` completes session env keys.""" + + def test_env_d_completes_existing_keys(self, image_cache): + store = MagicMock() + store.cache = image_cache + store.get_env.return_value = {"PATH": "/usr/bin", "DEBUG": "1"} + completer = _make_completer(client=MagicMock(), store=store) + results = _complete_line( + completer, + "", + "contree env -d ", + begidx=15, + ) + assert "PATH " in results + assert "DEBUG " in results + + def test_env_no_store_returns_empty(self): + completer = _make_completer(client=MagicMock(), store=None) + results = _complete_line( + completer, + "", + "contree env -d ", + begidx=15, + ) + assert results == [] + + +class TestSkillSpecSource: + """`skill remove ` completes spec prefixes and host paths.""" + + def test_skill_remove_offers_prefixes(self): + completer = _make_completer() + results = _complete_line( + completer, + "", + "contree skill remove ", + begidx=21, + ) + assert "claude:" in results + assert "codex:~" in results + assert "claude:~" in results + + def test_skill_remove_filters_by_prefix(self): + completer = _make_completer() + results = _complete_line( + completer, + "claude", + "contree skill remove claude", + begidx=21, + ) + assert "claude:" in results + assert "claude:~" in results + assert "codex:~" not in results diff --git a/tests/test_shell_repl.py b/tests/test_shell_repl.py index 3d06acb..7cb1835 100644 --- a/tests/test_shell_repl.py +++ b/tests/test_shell_repl.py @@ -19,7 +19,7 @@ ) from contree_cli.shell.completer import ShellCompleter from contree_cli.shell.parser import build_shell_parser -from contree_cli.shell.repl import ContreeShell +from contree_cli.shell.repl import ContreeShell, intercept_timeout, parse_duration def _make_shell() -> ContreeShell: @@ -230,7 +230,7 @@ def test_cat_no_args_falls_back_to_run(self): FORMATTER.set(formatter) with patch.object(shell, "dispatch_run") as mock_run: shell.execute("cat") - mock_run.assert_called_once_with(["cat"]) + mock_run.assert_called_once_with("cat") def test_cat_with_flags_falls_back_to_run(self): """'cat -n /etc/hosts' should fall back to implicit run.""" @@ -239,7 +239,7 @@ def test_cat_with_flags_falls_back_to_run(self): FORMATTER.set(formatter) with patch.object(shell, "dispatch_run") as mock_run: shell.execute("cat -n /etc/hosts") - mock_run.assert_called_once_with(["cat", "-n", "/etc/hosts"]) + mock_run.assert_called_once_with("cat -n /etc/hosts") def test_cat_with_glob_falls_back_to_run(self): """'cat *.py' should fall back to implicit run.""" @@ -248,7 +248,7 @@ def test_cat_with_glob_falls_back_to_run(self): FORMATTER.set(formatter) with patch.object(shell, "dispatch_run") as mock_run: shell.execute("cat *.py") - mock_run.assert_called_once_with(["cat", "*.py"]) + mock_run.assert_called_once_with("cat *.py") def test_cat_multiple_args_falls_back_to_run(self): """'cat a b' should fall back to implicit run.""" @@ -257,7 +257,7 @@ def test_cat_multiple_args_falls_back_to_run(self): FORMATTER.set(formatter) with patch.object(shell, "dispatch_run") as mock_run: shell.execute("cat a b") - mock_run.assert_called_once_with(["cat", "a", "b"]) + mock_run.assert_called_once_with("cat a b") def test_ls_with_flags_falls_back_to_run(self): """'ls -la' should fall back to implicit run.""" @@ -266,7 +266,7 @@ def test_ls_with_flags_falls_back_to_run(self): FORMATTER.set(formatter) with patch.object(shell, "dispatch_run") as mock_run: shell.execute("ls -la") - mock_run.assert_called_once_with(["ls", "-la"]) + mock_run.assert_called_once_with("ls -la") def test_ls_multiple_args_falls_back_to_run(self): """'ls /etc /tmp' should fall back to implicit run.""" @@ -275,7 +275,7 @@ def test_ls_multiple_args_falls_back_to_run(self): FORMATTER.set(formatter) with patch.object(shell, "dispatch_run") as mock_run: shell.execute("ls /etc /tmp") - mock_run.assert_called_once_with(["ls", "/etc", "/tmp"]) + mock_run.assert_called_once_with("ls /etc /tmp") # -- Pending files force run ----------------------------------------------- @@ -288,7 +288,7 @@ def test_cat_with_pending_files_falls_back_to_run(self): patch.object(shell, "dispatch_run") as mock_run, ): shell.execute("cat /etc/hosts") - mock_run.assert_called_once_with(["cat", "/etc/hosts"]) + mock_run.assert_called_once_with("cat /etc/hosts") def test_ls_with_pending_files_falls_back_to_run(self): """'ls /etc' with pending files should use run.""" @@ -299,7 +299,7 @@ def test_ls_with_pending_files_falls_back_to_run(self): patch.object(shell, "dispatch_run") as mock_run, ): shell.execute("ls /etc") - mock_run.assert_called_once_with(["ls", "/etc"]) + mock_run.assert_called_once_with("ls /etc") def test_cat_without_pending_files_dispatches_contree(self): """'cat /etc/hosts' without pending files uses contree dispatch.""" @@ -453,19 +453,19 @@ def test_bare_command_dispatches_run(self): shell = _make_shell() with patch.object(shell, "dispatch_run") as mock: shell.execute("echo hello") - mock.assert_called_once_with(["echo", "hello"]) + mock.assert_called_once_with("echo hello") def test_dispatch_run_constructs_run_args(self): - """dispatch_run should construct RunArgs with shell=True.""" + """dispatch_run wraps the raw line as a single shell-expression arg.""" shell = _make_shell() formatter = DefaultFormatter() FORMATTER.set(formatter) with _mock_session(), patch("contree_cli.cli.run.cmd_run") as mock_cmd: - shell.dispatch_run(["grep", "-r", "root", "/etc"]) + shell.dispatch_run("grep -r root /etc") args = mock_cmd.call_args[0][0] - assert args.command_args == ["grep", "-r", "root", "/etc"] + assert args.command_args == ["grep -r root /etc"] assert args.shell is True def test_bare_command_api_error_caught(self, capsys): @@ -478,7 +478,7 @@ def test_bare_command_api_error_caught(self, capsys): _mock_session(), patch("contree_cli.cli.run.cmd_run", side_effect=ApiError(500, "err", "")), ): - shell.dispatch_run(["echo", "hello"]) + shell.dispatch_run("echo hello") err = capsys.readouterr().err assert "API error" in err @@ -493,7 +493,7 @@ def test_bare_command_keyboard_interrupt_caught(self): _mock_session(), patch("contree_cli.cli.run.cmd_run", side_effect=KeyboardInterrupt), ): - shell.dispatch_run(["sleep", "100"]) + shell.dispatch_run("sleep 100") # Should not raise def test_bare_command_system_exit_caught(self): @@ -506,22 +506,147 @@ def test_bare_command_system_exit_caught(self): _mock_session(), patch("contree_cli.cli.run.cmd_run", side_effect=SystemExit(1)), ): - shell.dispatch_run(["false"]) + shell.dispatch_run("false") # Should not raise def test_pipe_expression(self): - """A pipe expression should be passed as-is with shell=True.""" + """Shell metacharacters like '|' are preserved (not quoted as literals). + + Regression: previously the shell REPL would re-join tokens via + ``shlex.join`` which wrapped operators in quotes, so the pipe was + passed to ``mount`` as a literal arg instead of being interpreted by + the remote shell. + """ + shell = _make_shell() + formatter = DefaultFormatter() + FORMATTER.set(formatter) + + with _mock_session(), patch("contree_cli.cli.run.cmd_run") as mock_cmd: + shell.execute("mount | grep cg") + + args = mock_cmd.call_args[0][0] + assert args.command_args == ["mount | grep cg"] + assert args.shell is True + + def test_redirection_expression(self): + """A redirection expression should be passed verbatim to sh -c.""" + shell = _make_shell() + formatter = DefaultFormatter() + FORMATTER.set(formatter) + + with _mock_session(), patch("contree_cli.cli.run.cmd_run") as mock_cmd: + shell.execute("echo hi > /tmp/out") + + args = mock_cmd.call_args[0][0] + assert args.command_args == ["echo hi > /tmp/out"] + + def test_semicolon_chain(self): + """A ';' command chain should reach sh -c unquoted.""" + shell = _make_shell() + formatter = DefaultFormatter() + FORMATTER.set(formatter) + + with _mock_session(), patch("contree_cli.cli.run.cmd_run") as mock_cmd: + shell.execute("echo 1 ; echo 2") + + args = mock_cmd.call_args[0][0] + assert args.command_args == ["echo 1 ; echo 2"] + + +class TestTimeoutBuiltin: + """``timeout DURATION CMD...`` sets the API operation timeout.""" + + @pytest.mark.parametrize( + "spec,expected", + [ + ("60", 60), + ("30s", 30), + ("5m", 300), + ("1h", 3600), + ("1d", 86400), + ("1.5m", 90), + ], + ) + def test_parse_duration_valid(self, spec, expected): + assert parse_duration(spec) == expected + + @pytest.mark.parametrize( + "spec", + ["", "abc", "0", "0s", "-5", "5x", "1.2.3", "5 m"], + ) + def test_parse_duration_invalid(self, spec): + assert parse_duration(spec) is None + + def test_intercept_basic(self): + result = intercept_timeout("timeout 60 apk add docker") + assert result == (60, "apk add docker") + + def test_intercept_suffix(self): + result = intercept_timeout("timeout 5m make build") + assert result == (300, "make build") + + def test_intercept_preserves_internal_quoting(self): + """Remainder keeps original quoting so sh -c parses it identically.""" + result = intercept_timeout('timeout 30 sh -c "echo 1 ; echo 2"') + assert result == (30, 'sh -c "echo 1 ; echo 2"') + + def test_intercept_rejects_unparseable_duration(self): + """Falls through when the duration is not a valid spec.""" + assert intercept_timeout("timeout --help") is None + assert intercept_timeout("timeout -k 5 30 cmd") is None + assert intercept_timeout("timeout abc cmd") is None + + def test_intercept_requires_command(self): + """No command after the duration → no interception.""" + assert intercept_timeout("timeout 60") is None + + def test_intercept_only_for_first_token(self): + """``timeout`` mid-line must not be intercepted.""" + assert intercept_timeout("echo timeout 60 hi") is None + + def test_execute_sets_operation_timeout(self): + """``timeout 90 long-build`` sets payload timeout to 90s.""" shell = _make_shell() formatter = DefaultFormatter() FORMATTER.set(formatter) with _mock_session(), patch("contree_cli.cli.run.cmd_run") as mock_cmd: - shell.dispatch_run(["ls", "/etc", "|", "head"]) + shell.execute("timeout 90 long-build") args = mock_cmd.call_args[0][0] - assert args.command_args == ["ls", "/etc", "|", "head"] + assert args.timeout == 90 + assert args.command_args == ["long-build"] assert args.shell is True + def test_execute_suffix_units(self): + shell = _make_shell() + formatter = DefaultFormatter() + FORMATTER.set(formatter) + + with _mock_session(), patch("contree_cli.cli.run.cmd_run") as mock_cmd: + shell.execute("timeout 2m apk add docker") + + args = mock_cmd.call_args[0][0] + assert args.timeout == 120 + assert args.command_args == ["apk add docker"] + + def test_execute_falls_through_when_unparseable(self): + """Unrecognised duration → run ``timeout`` as a normal shell command. + + Lets the user reach the in-image ``timeout`` binary for advanced + usages (e.g. ``--kill-after``, signal flags) that we don't translate. + """ + shell = _make_shell() + formatter = DefaultFormatter() + FORMATTER.set(formatter) + + with _mock_session(), patch("contree_cli.cli.run.cmd_run") as mock_cmd: + shell.execute("timeout --help") + + args = mock_cmd.call_args[0][0] + assert args.timeout is None + assert args.command_args == ["timeout --help"] + class TestRun: def test_eof_exits_cleanly(self): @@ -898,7 +1023,7 @@ def test_general_exception_logged(self, capsys): "contree_cli.cli.run.cmd_run", side_effect=RuntimeError("unexpected") ), ): - shell.dispatch_run(["echo", "hello"]) + shell.dispatch_run("echo hello") # Should not raise -- error is logged def test_dispatch_contree_general_exception_logged(self, capsys): From cc3f59d6ec1ebe3350a53260e97a779cda29e239 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Tue, 12 May 2026 21:10:50 +0200 Subject: [PATCH 2/9] add docs --- contree_cli/agent.md | 25 ++++++++++++++++++ contree_cli/cli/images.py | 16 +++++++----- contree_cli/cli/ps.py | 6 ----- contree_cli/skill_body.md | 53 ++++++++++++++++++++++++++++++++++++--- pyproject.toml | 2 +- tests/test_images.py | 38 +++++++++++++++++++++++++--- uv.lock | 2 +- 7 files changed, 121 insertions(+), 21 deletions(-) diff --git a/contree_cli/agent.md b/contree_cli/agent.md index ae543bf..1fecede 100644 --- a/contree_cli/agent.md +++ b/contree_cli/agent.md @@ -211,10 +211,31 @@ Piped stdin: Detached mode (-d): contree run -d -- long-running-task contree ps check status + contree ps -a -S FAILED --since=1h recent failures contree show UUID view result contree session wait block until done contree session wait UUID1 UUID2 wait for specific ops +Monitoring background operations: + Use the `operation` namespace (alias `op`) when juggling several + detached runs. `op ls` is `ps`; `op show` and `op cancel` accept + multiple UUIDs in one call. + + contree op ls list operations (= ps) + contree op ls -a -S EXECUTING filter active runs + contree op show UUID1 UUID2 UUID3 inspect a batch in one call + contree op cancel UUID1 UUID2 cancel selected operations + contree op cancel --all cancel every active op (rare) + + Fan-out + join pattern: + A=$(contree run -d -- make -C /work/a build | jq -r .uuid) + B=$(contree run -d -- make -C /work/b build | jq -r .uuid) + contree session wait "$A" "$B" + contree op show "$A" "$B" + + Background checks are cheap: terminal results are cached locally, + so repeated `op show` / `show` calls do not re-hit the API. + Disposable mode (-D) — no image checkpoint: contree run -D -- rm -rf /tmp/* contree run -D -- cat /etc/passwd @@ -357,6 +378,10 @@ All commands ps List operations kill UUID Cancel operation show UUID Show operation result + operation list List operations (aliases: op ls) + operation show UUID... Show one or more operation results (aliases: op) + operation cancel UUID... + Cancel one or more operations (or --all) ls [PATH] List files in image (no VM) cat PATH Show file content (no VM) cp PATH DEST Download file from image diff --git a/contree_cli/cli/images.py b/contree_cli/cli/images.py index 6bb4947..6e9896a 100644 --- a/contree_cli/cli/images.py +++ b/contree_cli/cli/images.py @@ -287,12 +287,16 @@ def cmd_images(args: ImagesArgs) -> None: if not images: return for image in images: - created_at = parse_datetime(image["created_at"]) - formatter( - uuid=image["uuid"], - created_at=created_at, - tag=image.get("tag") or "", - ) + row: dict[str, object] = {} + for key, value in image.items(): + if isinstance(value, (dict, list)): + continue + if key == "created_at" and isinstance(value, str): + value = parse_datetime(value) + if key == "tag" and value is None: + value = "" + row[key] = value + formatter(**row) emitted += len(images) if len(images) < page_size: return diff --git a/contree_cli/cli/ps.py b/contree_cli/cli/ps.py index 90fbc67..76a54ba 100644 --- a/contree_cli/cli/ps.py +++ b/contree_cli/cli/ps.py @@ -137,12 +137,6 @@ def emit_op(formatter: OutputFormatter, op: dict[str, Any], *, quiet: bool) -> N if quiet: print(op["uuid"]) return - # Take every scalar top-level field from the API response so new server - # fields show up automatically. Nested structures (metadata, result) are - # skipped to keep the table readable -- use `show UUID` for the detail - # view that drills into them. ``error`` is pinned to the last column - # because it can be a long free-form message and trailing it keeps the - # rest of the row aligned. row = { key: transform_field(key, value) for key, value in op.items() diff --git a/contree_cli/skill_body.md b/contree_cli/skill_body.md index 13dddb0..0cdcc49 100644 --- a/contree_cli/skill_body.md +++ b/contree_cli/skill_body.md @@ -168,7 +168,13 @@ Unsure about sessions? Run `contree session --help` or `contree agent sessions` - `session checkout`: switch active branch. - `session rollback`: move the active branch pointer backward. - `session wait`: wait for active operations, or specific operation UUIDs. -- `ps` / `show` / `kill`: inspect, read, or cancel operations. +- `ps` / `show` / `kill`: inspect, read, or cancel a single operation. +- `operation` (alias `op`): grouped namespace for the same actions plus + multi-UUID variants. Use this when monitoring background work. + - `op ls` -- same flags as `ps`, lists operations. Pipe to `-q` for UUIDs. + - `op show UUID1 UUID2 ...` -- fetch several operation results in one call. + - `op cancel UUID1 UUID2 ...` -- cancel several operations, or `--all` + to cancel every active one. ## Execution patterns @@ -292,10 +298,49 @@ Use staged files when several edits should land together on the next run. Use `- ## Detached operations +Use detached runs whenever a step is slow (large image imports, builds, +test suites). The CLI returns immediately with an operation UUID; +monitoring is then a polling problem rather than a blocking one. + - Start long work detached: `contree -S run -d -- long-job` -- Inspect running operations with `contree ps` -- Read results with `contree show ` -- Use `contree session wait [OP_ID ...]` when available to wait for active or specific operations. +- Fan out several jobs in parallel: each `run -d` returns its own UUID. + +Monitoring background operations: + +- `contree ps` -- active operations (PENDING, ASSIGNED, EXECUTING). +- `contree ps -a` -- include completed/failed/cancelled. +- `contree ps -q` -- UUIDs only, pipe-friendly. +- `contree op ls` -- alias for `ps`, identical flags. +- `contree show UUID` -- single-operation detail (status, duration, + exit code, stdout/stderr, resulting image). +- `contree op show UUID1 UUID2 UUID3` -- fetch several operations in + one shot. Convenient when fanning out runs and checking the batch. +- `contree session wait` -- block until all active ops of the current + session reach terminal state. +- `contree session wait UUID1 UUID2` -- block on specific UUIDs. + +Cancelling: + +- `contree kill UUID` -- single operation. +- `contree op cancel UUID1 UUID2` -- batch of UUIDs. +- `contree op cancel --all` -- every active operation (use sparingly). + +Common patterns: + +```bash +# Fan out: start three builds, wait for all, inspect each +A=$(contree run -d -- make -C /work/a build | jq -r .uuid) +B=$(contree run -d -- make -C /work/b build | jq -r .uuid) +C=$(contree run -d -- make -C /work/c build | jq -r .uuid) +contree session wait "$A" "$B" "$C" +contree op show "$A" "$B" "$C" + +# Snapshot what is running right now +contree -f json op ls | jq '.uuid' + +# Find recent failures across the project +contree -f json ps -a -S FAILED --since=1h +``` ## Output and automation diff --git a/pyproject.toml b/pyproject.toml index bb60ade..dcdc1c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "contree-cli" -version = "0.4.4" +version = "0.6.0" description = "CLI client for the ConTree sandbox platform" readme = "README.md" requires-python = ">=3.10" diff --git a/tests/test_images.py b/tests/test_images.py index 896fcac..4972be0 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -68,12 +68,12 @@ def test_null_tag_shown_as_empty(self, contree_client, capsys): ] _run_cmd(contree_client, images) out = capsys.readouterr().out - # tag is empty string, so the row ends with a trailing comma-separator lines = out.splitlines() data_line = lines[1] assert "ccc" in data_line - # empty tag means two consecutive delimiters or trailing delimiter - assert data_line.endswith(",") + # null tag renders as empty in CSV: two consecutive delimiters + # around its position. + assert ",," in data_line def test_empty_list(self, contree_client, capsys): _run_cmd(contree_client, []) @@ -101,6 +101,38 @@ def test_table_output(self, contree_client, capsys): assert len(lines) == 3 # header + 2 rows assert "UUID" in lines[0] + def test_unknown_field_passes_through(self, contree_client, capsys): + """New server fields (e.g. ``size``, ``digest``) reach the row as-is.""" + images = [ + { + "uuid": "ggg", + "tag": "v4", + "created_at": "2025-06-01T00:00:00Z", + "size": 12345, + "digest": "sha256:abcd", + }, + ] + _run_cmd(contree_client, images, formatter=JSONFormatter()) + parsed = json.loads(capsys.readouterr().out.strip()) + assert parsed["size"] == 12345 + assert parsed["digest"] == "sha256:abcd" + + def test_nested_fields_skipped(self, contree_client, capsys): + images = [ + { + "uuid": "hhh", + "tag": "v5", + "created_at": "2025-06-01T00:00:00Z", + "metadata": {"foo": "bar"}, + "tags": ["a", "b"], + }, + ] + _run_cmd(contree_client, images, formatter=JSONFormatter()) + parsed = json.loads(capsys.readouterr().out.strip()) + assert "metadata" not in parsed + assert "tags" not in parsed + assert parsed["uuid"] == "hhh" + class TestImagesParams: def test_uuid_param(self, contree_client): diff --git a/uv.lock b/uv.lock index 628a700..4c6498d 100644 --- a/uv.lock +++ b/uv.lock @@ -159,7 +159,7 @@ wheels = [ [[package]] name = "contree-cli" -version = "0.4.4" +version = "0.6.0" source = { editable = "." } [package.dev-dependencies] From 71ecfb6ab142c2c587459e29284ad3dd4abd3a30 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Tue, 12 May 2026 22:19:52 +0200 Subject: [PATCH 3/9] add history search --- contree_cli/manual.md | 3 +- contree_cli/shell/repl.py | 168 ++++++++++++++++++++++++++------------ docs/commands/shell.md | 17 +++- docs/tutorial/shell.md | 11 ++- tests/test_shell_repl.py | 127 +++++++++++++++++++++------- 5 files changed, 241 insertions(+), 85 deletions(-) diff --git a/contree_cli/manual.md b/contree_cli/manual.md index 94cc0fe..7584ad6 100644 --- a/contree_cli/manual.md +++ b/contree_cli/manual.md @@ -62,10 +62,11 @@ Interactive shell Inside the shell: - Bare commands run in the sandbox (implicit contree run) - - ls / cat use the API — no VM spawned + - ls / cat use the API, no VM spawned - vim / nano open contree file edit with your host editor - cd changes session working directory - Tab completes commands, paths, images, branches + - 'history [SEARCH]' filters persisted history by substring - Use 'contree run' prefix for flags like -D, -e, --file More: contree shell --help diff --git a/contree_cli/shell/repl.py b/contree_cli/shell/repl.py index c6ee81b..e892d19 100644 --- a/contree_cli/shell/repl.py +++ b/contree_cli/shell/repl.py @@ -2,11 +2,13 @@ from __future__ import annotations +import argparse import contextlib import logging import re import shlex import sys +from dataclasses import dataclass from functools import cached_property from contree_cli import FORMATTER, IN_SHELL, PROFILE, SESSION_STORE, ArgumentsProtocol @@ -20,8 +22,6 @@ log = logging.getLogger(__name__) -_PROMPT_BASE = "contree" - # Regex matching ANSI escape sequences (CSI and OSC). ANSI_RE = re.compile(r"(\033\[[0-9;]*m)") @@ -39,7 +39,7 @@ LIBEDIT = False -def _readline_safe_prompt(prompt: str) -> str: +def readline_safe_prompt(prompt: str) -> str: """Make *prompt* safe for readline cursor-position tracking. GNU readline recognises ``\\x01``/``\\x02`` markers around invisible @@ -99,14 +99,80 @@ def intercept_timeout(line: str) -> tuple[int, str] | None: EDITOR_ALIASES = frozenset({"vim", "vi", "nvim", "nano"}) # Aliases for ``help `` lookup. -_HELP_ALIASES: dict[str, str] = { +HELP_ALIASES: dict[str, str] = { "quit": "exit", "-f": "--format", "vi": "vim", } + +HISTORY_PARSER = ShellArgumentParser( + prog="history", + description=( + "Show command history for the current session, " + "optionally filtered by a case-insensitive substring." + ), +) +HISTORY_PARSER.add_argument( + "search", + nargs="?", + default=None, + metavar="PATTERN", + help="Case-insensitive substring; only matching lines are shown", +) + + +HELP_PARSER = ShellArgumentParser( + prog="help", + description=( + "Show general shell help, or detailed help for a builtin, " + "alias, or contree command." + ), +) +HELP_PARSER.add_argument( + "topic", + nargs="?", + default=None, + metavar="TOPIC", + help="Builtin, alias, or contree command name", +) + + +@dataclass(frozen=True) +class HistoryArgs: + search: str | None + + +@dataclass(frozen=True) +class HelpArgs: + topic: str | None + + +def get_help(parser: ShellArgumentParser) -> str: + """Return *parser*'s formatted help text (trailing newline stripped).""" + return parser.format_help().rstrip() + + +def parse_builtin( + parser: ShellArgumentParser, + tokens: list[str], +) -> argparse.Namespace | None: + """Run *parser* on *tokens*. Return ``None`` when callers should stop. + + ``ShellParseError`` with ``status == 0`` means argparse already + printed ``--help`` output to stdout; callers stop. A non-zero status + means an actual parse error: the message is forwarded to stderr. + """ + try: + return parser.parse_args(tokens) + except ShellParseError as exc: + if exc.status != 0 and exc.message: + print(f"{parser.prog}: {exc.message}", file=sys.stderr) + return None + + # Per-builtin help text shown by ``help ``. -_BUILTIN_HELP: dict[str, str] = { +BUILTIN_HELP: dict[str, str] = { "cd": ( "Usage: cd [PATH]\n" "\n" @@ -117,20 +183,8 @@ def intercept_timeout(line: str) -> tuple[int, str] | None: " cd reset to sandbox default" ), "pwd": "Usage: pwd\n\nPrint the current working directory.", - "history": ( - "Usage: history [N]\n" - "\n" - "Show command history for the current session.\n" - "Optional argument N limits output to the last N entries." - ), - "help": ( - "Usage: help [TOPIC]\n" - "\n" - "Show general shell help, or help for a specific command.\n" - " help general shell help\n" - " help cd help for the cd builtin\n" - " help run help for the run command" - ), + "history": get_help(HISTORY_PARSER), + "help": get_help(HELP_PARSER), "clear": "Usage: clear\n\nClear the terminal screen.", "timeout": ( "Usage: timeout DURATION COMMAND...\n" @@ -201,9 +255,9 @@ def __init__( parser: ShellArgumentParser, completer: ShellCompleter, ) -> None: - self._parser = parser - self._completer = completer - self.__prev_cwd = "/" + self.parser = parser + self.completer = completer + self.prev_cwd = "/" @property def cwd(self) -> str: @@ -211,13 +265,9 @@ def cwd(self) -> str: @cwd.setter def cwd(self, value: str) -> None: - self.__prev_cwd = self.cwd + self.prev_cwd = self.cwd self.session_store.set_cwd(value) - @property - def prev_cwd(self) -> str: - return self.__prev_cwd - @cached_property def session_store(self) -> SessionStore: store = SESSION_STORE.get() @@ -251,8 +301,8 @@ def print_status_line(self) -> None: print(line, file=sys.stderr) @property - def _prompt(self) -> str: - """Short input prompt — only cwd and branch, no ANSI length issues.""" + def prompt(self) -> str: + """Short input prompt: cwd and branch on the active line.""" branch = "" try: session = self.session_store.session @@ -268,7 +318,7 @@ def run(self) -> None: token = IN_SHELL.set(True) if READLINE_AVAILABLE: - readline.set_completer(self._completer.complete) + readline.set_completer(self.completer.complete) readline.set_completer_delims(" \t\n") print("contree interactive shell (type 'help' for commands, Ctrl-D to exit)") @@ -276,7 +326,7 @@ def run(self) -> None: while True: try: self.print_status_line() - line = input(_readline_safe_prompt(self._prompt)) + line = input(readline_safe_prompt(self.prompt)) except KeyboardInterrupt: # Ctrl-C on empty prompt — print newline, continue print() @@ -285,7 +335,7 @@ def run(self) -> None: if not line: continue # Handle line continuation (trailing \ or unclosed quotes) - line = self._read_continuation(line) + line = self.read_continuation(line) if not line: continue self.execute(line) @@ -296,7 +346,7 @@ def run(self) -> None: IN_SHELL.reset(token) @staticmethod - def _read_continuation(line: str) -> str: + def read_continuation(line: str) -> str: """Prompt for continuation lines when input is incomplete. Handles trailing backslash (line continuation) and unclosed @@ -315,7 +365,7 @@ def _read_continuation(line: str) -> str: except ValueError: pass try: - continuation = input(_readline_safe_prompt("> ")) + continuation = input(readline_safe_prompt("> ")) except KeyboardInterrupt: print() return "" @@ -430,7 +480,7 @@ def invalidate_completion_cache( def dispatch_contree(self, tokens: list[str]) -> None: """Dispatch a contree management command via argparse.""" try: - ns = self._parser.parse_args(tokens) + ns = self.parser.parse_args(tokens) except ShellParseError as exc: # status=0 means --help was triggered (already printed) if exc.status == 0: @@ -573,8 +623,13 @@ def handle_cd(self, args: list[str]) -> None: return self.cwd = self.session_store.resolve_path(target) - def handle_history(self, args: list[str]) -> None: - """Print shell history from the session database.""" + def handle_history(self, tokens: list[str]) -> None: + """Print shell history, optionally filtered by case-insensitive substring.""" + ns = parse_builtin(HISTORY_PARSER, tokens) + if ns is None: + return + args = HistoryArgs(search=ns.search) + try: lines = self.session_store.load_shell_history() except (LookupError, Exception): @@ -582,13 +637,17 @@ def handle_history(self, args: list[str]) -> None: if not lines: print("(no history)") return - # Optional: limit output with an argument (e.g. ``history 20``) - count = len(lines) - if args: - with contextlib.suppress(ValueError): - count = int(args[0]) - for i, line in enumerate(lines[-count:], start=max(1, len(lines) - count + 1)): - print(f" {i:5d} {line}") + + numbered = list(enumerate(lines, start=1)) + if args.search is not None: + needle = args.search.casefold() + numbered = [(n, line) for n, line in numbered if needle in line.casefold()] + if not numbered: + print(f"(no matches for {args.search!r})") + return + + for n, line in numbered: + print(f" {n:5d} {line}") @staticmethod def format_name(formatter: OutputFormatter) -> str: @@ -611,14 +670,23 @@ def handle_format_command(self, args: list[str]) -> None: return FORMATTER.set(FORMATTERS[name]()) - def handle_help(self, args: list[str]) -> None: + def handle_help(self, tokens: list[str]) -> None: """Show general shell help or help for a specific topic.""" - if not args: + # A topic can itself look like a flag (e.g. ``help -f``, + # ``help --format``). Prepend ``--`` so argparse treats anything + # other than ``-h``/``--help`` as a positional value. + if tokens and tokens[0].startswith("-") and tokens[0] not in ("-h", "--help"): + tokens = ["--", *tokens] + ns = parse_builtin(HELP_PARSER, tokens) + if ns is None: + return + args = HelpArgs(topic=ns.topic) + if args.topic is None: self.print_shell_help() return - topic = _HELP_ALIASES.get(args[0], args[0]) - if topic in _BUILTIN_HELP: - print(_BUILTIN_HELP[topic]) + topic = HELP_ALIASES.get(args.topic, args.topic) + if topic in BUILTIN_HELP: + print(BUILTIN_HELP[topic]) return # Delegate to the contree command's --help. self.dispatch_contree([topic, "--help"]) @@ -635,7 +703,7 @@ def print_shell_help(self) -> None: "Builtins:\n" " cd [PATH] Change working directory (cd - for previous)\n" " pwd Print working directory\n" - " history [N] Show command history (optional limit)\n" + " history [SEARCH] Show history (filter by case-insensitive substring)\n" " help [TOPIC] Show help for a command or builtin\n" " clear Clear the terminal screen\n" " --format [NAME] Change or show the output format\n" diff --git a/docs/commands/shell.md b/docs/commands/shell.md index f0a2316..cc04014 100644 --- a/docs/commands/shell.md +++ b/docs/commands/shell.md @@ -56,7 +56,7 @@ contree run -e DEBUG=1 -- ./app |---------|-------------| | `cd [PATH]` | Change working directory (`cd -` for previous) | | `pwd` | Print working directory | -| `history [N]` | Show command history (optional limit) | +| `history [SEARCH]` | Show command history, optionally filtered by substring | | `help [TOPIC]` | Show help (optionally for a specific command) | | `clear` | Clear the terminal screen | | `timeout DURATION CMD...` | Run `CMD...` with the API operation timeout set to `DURATION` | @@ -162,6 +162,21 @@ Path completions query the sandbox filesystem via the inspect API and are cached persistently -- subsequent completions for the same directory are instant. +## History search + +The `history` builtin takes an optional pattern and filters the +persisted history by case-insensitive substring: + +```text +contree:/> history # show every entry for this session +contree:/> history apt # any line containing "apt" +contree:/> history 'contree ' # exact "contree " (with trailing space) +contree:/> history make # any line containing "make" +``` + +History is per-session: searches see only the current `session_key`'s +entries. Up to 10,000 lines are kept; older lines are trimmed on save. + ## Line continuation A trailing `\` at the end of input triggers a `> ` continuation prompt, diff --git a/docs/tutorial/shell.md b/docs/tutorial/shell.md index 0c9696c..d1abd6b 100644 --- a/docs/tutorial/shell.md +++ b/docs/tutorial/shell.md @@ -249,15 +249,18 @@ contree:/app> pwd ### `history` -Show command history for the current session: +Show command history for the current session, optionally filtered by a +case-insensitive substring: ```text -contree:/> history # show all -contree:/> history 20 # show last 20 entries +contree:/> history # show all entries +contree:/> history apt # only lines containing "apt" +contree:/> history 'contree ' # quoted match (note trailing space) ``` History is persisted in SQLite per session (up to 10,000 lines) and -restored when you re-enter the shell. +restored when you re-enter the shell. Search is scoped to the current +session key; different sessions have isolated history. ### `help` diff --git a/tests/test_shell_repl.py b/tests/test_shell_repl.py index 7cb1835..182dd4f 100644 --- a/tests/test_shell_repl.py +++ b/tests/test_shell_repl.py @@ -106,7 +106,7 @@ def fake_handler(args): # Patch the parser to use our fake handler with patch.object( - shell._parser, + shell.parser, "parse_args", ) as mock_parse: ns = MagicMock() @@ -126,7 +126,7 @@ def test_contree_api_error_caught(self, capsys, session_store): formatter = DefaultFormatter() FORMATTER.set(formatter) - with patch.object(shell._parser, "parse_args") as mock_parse: + with patch.object(shell.parser, "parse_args") as mock_parse: ns = MagicMock() ns.handler = MagicMock(side_effect=ApiError(404, "Not Found", "gone")) ns.load_args = MagicMock() @@ -144,7 +144,7 @@ def test_contree_keyboard_interrupt_caught(self, capsys): formatter = DefaultFormatter() FORMATTER.set(formatter) - with patch.object(shell._parser, "parse_args") as mock_parse: + with patch.object(shell.parser, "parse_args") as mock_parse: ns = MagicMock() ns.handler = MagicMock(side_effect=KeyboardInterrupt) ns.load_args = MagicMock() @@ -160,7 +160,7 @@ def test_contree_system_exit_caught(self, capsys): formatter = DefaultFormatter() FORMATTER.set(formatter) - with patch.object(shell._parser, "parse_args") as mock_parse: + with patch.object(shell.parser, "parse_args") as mock_parse: ns = MagicMock() ns.handler = MagicMock(side_effect=SystemExit(1)) ns.load_args = MagicMock() @@ -685,7 +685,7 @@ def test_per_command_override_uses_json(self): def fake_handler(args): captured_formatter["type"] = type(FORMATTER.get()) - with patch.object(shell._parser, "parse_args") as mock_parse: + with patch.object(shell.parser, "parse_args") as mock_parse: ns = MagicMock() ns.handler = fake_handler ns.load_args = MagicMock() @@ -702,7 +702,7 @@ def test_per_command_override_restores_original(self): original = DefaultFormatter() FORMATTER.set(original) - with patch.object(shell._parser, "parse_args") as mock_parse: + with patch.object(shell.parser, "parse_args") as mock_parse: ns = MagicMock() ns.handler = MagicMock() ns.load_args = MagicMock() @@ -719,7 +719,7 @@ def test_per_command_override_restores_on_exception(self): original = DefaultFormatter() FORMATTER.set(original) - with patch.object(shell._parser, "parse_args") as mock_parse: + with patch.object(shell.parser, "parse_args") as mock_parse: ns = MagicMock() ns.handler = MagicMock(side_effect=ApiError(500, "err", "")) ns.load_args = MagicMock() @@ -736,7 +736,7 @@ def test_no_format_flag_keeps_formatter(self): original = DefaultFormatter() FORMATTER.set(original) - with patch.object(shell._parser, "parse_args") as mock_parse: + with patch.object(shell.parser, "parse_args") as mock_parse: ns = MagicMock() ns.handler = MagicMock() ns.load_args = MagicMock() @@ -776,7 +776,7 @@ def test_help_history_prints_builtin(self, capsys): shell = _make_shell() shell.execute("help history") out = capsys.readouterr().out - assert "Usage: history" in out + assert "usage: history" in out.lower() def test_help_clear_prints_builtin(self, capsys): shell = _make_shell() @@ -851,7 +851,7 @@ def test_help_help_prints_builtin(self, capsys): shell = _make_shell() shell.execute("help help") out = capsys.readouterr().out - assert "Usage: help" in out + assert "usage: help" in out.lower() class TestFormatCommand: @@ -954,21 +954,90 @@ def test_history_with_lines(self, capsys): assert "ls /etc" in out assert "cat /etc/hosts" in out - def test_history_with_count_limit(self, capsys): + def test_history_filters_by_substring(self, capsys): + """``history PATTERN`` keeps only lines containing PATTERN.""" shell = _make_shell() with _mock_session() as store: store.load_shell_history.return_value = [ - "cmd-1", - "cmd-2", - "cmd-3", - "cmd-4", - "cmd-5", + "apt-get update", + "ls /etc", + "apt-get install curl", + "cat /etc/hosts", ] - shell.execute("history 2") + shell.execute("history apt") out = capsys.readouterr().out - assert "cmd-4" in out - assert "cmd-5" in out - assert "cmd-1" not in out + assert "apt-get update" in out + assert "apt-get install curl" in out + assert "ls /etc" not in out + assert "cat /etc/hosts" not in out + + def test_history_filter_is_case_insensitive(self, capsys): + shell = _make_shell() + with _mock_session() as store: + store.load_shell_history.return_value = ["LS /etc", "ls /tmp"] + shell.execute("history LS") + out = capsys.readouterr().out + assert "LS /etc" in out + assert "ls /tmp" in out + + def test_history_quoted_pattern_with_space(self, capsys): + """A quoted pattern with a trailing space matches `contree ` literally.""" + shell = _make_shell() + with _mock_session() as store: + store.load_shell_history.return_value = [ + "contree show UUID", + "contree-helper foo", + "apt install contree", + "contree run -- make", + ] + shell.execute("history 'contree '") + out = capsys.readouterr().out + assert "contree show UUID" in out + assert "contree run -- make" in out + # Trailing space excludes `contree-helper` (no space after contree) + assert "contree-helper foo" not in out + # And `apt install contree` (contree without trailing space inside line) + assert "apt install contree" not in out + + def test_history_no_matches_message(self, capsys): + shell = _make_shell() + with _mock_session() as store: + store.load_shell_history.return_value = ["ls /etc", "cat /etc/hosts"] + shell.execute("history xyzzy") + out = capsys.readouterr().out + assert "no matches" in out + assert "xyzzy" in out + + +class TestArgparseBuiltins: + """history and help are backed by ShellArgumentParser.""" + + def test_history_dash_h_prints_usage(self, capsys): + shell = _make_shell() + shell.execute("history --help") + out = capsys.readouterr().out + assert "usage: history" in out.lower() + assert "PATTERN" in out + + def test_history_unknown_flag_prints_error(self, capsys): + shell = _make_shell() + shell.execute("history --bogus") + err = capsys.readouterr().err + assert "history:" in err + + def test_help_dash_h_prints_usage(self, capsys): + shell = _make_shell() + shell.execute("help --help") + out = capsys.readouterr().out + assert "usage: help" in out.lower() + assert "TOPIC" in out + + def test_help_with_flag_like_topic_resolves(self, capsys): + """``help -f`` reaches BUILTIN_HELP via the HELP_ALIASES table.""" + shell = _make_shell() + shell.execute("help -f") + out = capsys.readouterr().out + assert "--format" in out class TestShellPrevention: @@ -977,7 +1046,7 @@ def test_shell_inside_shell_prints_error(self, capsys): formatter = DefaultFormatter() FORMATTER.set(formatter) - with patch.object(shell._parser, "parse_args") as mock_parse: + with patch.object(shell.parser, "parse_args") as mock_parse: ns = MagicMock() ns.command = "shell" ns.handler = MagicMock() @@ -1031,7 +1100,7 @@ def test_dispatch_contree_general_exception_logged(self, capsys): formatter = DefaultFormatter() FORMATTER.set(formatter) - with patch.object(shell._parser, "parse_args") as mock_parse: + with patch.object(shell.parser, "parse_args") as mock_parse: ns = MagicMock() ns.command = "ls" ns.handler = MagicMock(side_effect=RuntimeError("oops")) @@ -1065,40 +1134,40 @@ def test_nvim_dispatches_edit(self): class TestReadContinuation: def test_complete_line_returns_unchanged(self): - result = ContreeShell._read_continuation("ls /etc") + result = ContreeShell.read_continuation("ls /etc") assert result == "ls /etc" def test_trailing_backslash_joins_lines(self): with patch("builtins.input", return_value="/etc"): - result = ContreeShell._read_continuation("ls \\") + result = ContreeShell.read_continuation("ls \\") # backslash-newline removed, space before \ provides separation assert result == "ls /etc" def test_unclosed_quote_preserves_newline(self): with patch("builtins.input", return_value='world"'): - result = ContreeShell._read_continuation('echo "hello') + result = ContreeShell.read_continuation('echo "hello') # bare newline inside quotes is preserved (no preceding \) assert result == 'echo "hello\nworld"' def test_ctrl_c_cancels_continuation(self): with patch("builtins.input", side_effect=KeyboardInterrupt): - result = ContreeShell._read_continuation("ls \\") + result = ContreeShell.read_continuation("ls \\") assert result == "" def test_ctrl_d_cancels_continuation(self): with patch("builtins.input", side_effect=EOFError): - result = ContreeShell._read_continuation("ls \\") + result = ContreeShell.read_continuation("ls \\") assert result == "" def test_multi_line_continuation(self): inputs = iter(["/a \\", "/b"]) with patch("builtins.input", side_effect=inputs): - result = ContreeShell._read_continuation("ls \\") + result = ContreeShell.read_continuation("ls \\") assert result == "ls /a /b" def test_continuation_produces_correct_tokens(self): with patch("builtins.input", side_effect=["-alh \\", "/sys"]): - result = ContreeShell._read_continuation("ls \\") + result = ContreeShell.read_continuation("ls \\") import shlex assert shlex.split(result) == ["ls", "-alh", "/sys"] From 7b90093316d48eedbbce0b6ded1478a12fd16651 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Tue, 12 May 2026 23:12:33 +0200 Subject: [PATCH 4/9] add contree build --- contree_cli/agent.md | 24 +- contree_cli/arguments.py | 2 + contree_cli/cli/build.py | 224 ++++++++++++++++++ contree_cli/cli/file.py | 2 +- contree_cli/cli/run.py | 5 +- contree_cli/docker/__init__.py | 42 ++++ contree_cli/docker/context.py | 165 ++++++++++++++ contree_cli/docker/dockerignore.py | 102 +++++++++ contree_cli/docker/keyword.py | 86 +++++++ contree_cli/docker/kw_add.py | 60 +++++ contree_cli/docker/kw_arg.py | 36 +++ contree_cli/docker/kw_copy.py | 159 +++++++++++++ contree_cli/docker/kw_env.py | 30 +++ contree_cli/docker/kw_from.py | 113 ++++++++++ contree_cli/docker/kw_run.py | 184 +++++++++++++++ contree_cli/docker/kw_skipped.py | 27 +++ contree_cli/docker/kw_user.py | 28 +++ contree_cli/docker/kw_workdir.py | 34 +++ contree_cli/docker/local_context.py | 159 +++++++++++++ contree_cli/docker/parser.py | 93 ++++++++ contree_cli/skill_body.md | 31 +++ contree_cli/types.py | 5 + docs/commands/index.md | 1 + tests/test_build.py | 337 ++++++++++++++++++++++++++++ tests/test_dockerfile.py | 197 ++++++++++++++++ tests/test_dockerignore.py | 117 ++++++++++ tests/test_run.py | 10 +- 27 files changed, 2264 insertions(+), 9 deletions(-) create mode 100644 contree_cli/cli/build.py create mode 100644 contree_cli/docker/__init__.py create mode 100644 contree_cli/docker/context.py create mode 100644 contree_cli/docker/dockerignore.py create mode 100644 contree_cli/docker/keyword.py create mode 100644 contree_cli/docker/kw_add.py create mode 100644 contree_cli/docker/kw_arg.py create mode 100644 contree_cli/docker/kw_copy.py create mode 100644 contree_cli/docker/kw_env.py create mode 100644 contree_cli/docker/kw_from.py create mode 100644 contree_cli/docker/kw_run.py create mode 100644 contree_cli/docker/kw_skipped.py create mode 100644 contree_cli/docker/kw_user.py create mode 100644 contree_cli/docker/kw_workdir.py create mode 100644 contree_cli/docker/local_context.py create mode 100644 contree_cli/docker/parser.py create mode 100644 tests/test_build.py create mode 100644 tests/test_dockerfile.py create mode 100644 tests/test_dockerignore.py diff --git a/contree_cli/agent.md b/contree_cli/agent.md index 1fecede..ce8026e 100644 --- a/contree_cli/agent.md +++ b/contree_cli/agent.md @@ -134,7 +134,28 @@ Tag conventions: Always search before building: contree images --prefix=python-dev -More: contree images --help, contree tag --help +Building from a Dockerfile: + When a project already ships a Dockerfile, prefer `contree build` + over hand-running each step. It executes FROM/RUN/COPY/WORKDIR/ENV + /ARG/USER against the API and caches every layer as a branch so + rebuilds are fast. + + Layer cache is keyed by abspath(context), shared across invocations: + contree build . build ./Dockerfile, no tag + contree build . --tag myapp:dev build + tag the final image + contree build ./app --dockerfile ./app/Dockerfile.prod --tag svc:prod + contree build . --build-arg VERSION=1.2 + contree build . --no-cache force rebuild + + Supported directives: FROM, RUN, COPY, ADD (local paths only), + WORKDIR, ENV, ARG, USER. CMD/ENTRYPOINT/LABEL/EXPOSE/VOLUME/etc. + are parsed but skipped with a warning. Multi-stage (AS / --from) + is not yet supported. + + .dockerignore is applied to every COPY/ADD walk on top of the + default exclude list (.git, __pycache__, node_modules, etc.). + +More: contree build --help, contree images --help, contree tag --help Files and directories ===================== @@ -373,6 +394,7 @@ All commands use [IMAGE] Set or show session image (aliases: ci) run [-- CMD] Spawn sandbox instance (aliases: r) + build [CONTEXT] Build image from Dockerfile (aliases: bd) images List/import images (aliases: i, img) tag [IMAGE] TAG Tag image (aliases: t) ps List operations diff --git a/contree_cli/arguments.py b/contree_cli/arguments.py index 98d5773..ccde59b 100644 --- a/contree_cli/arguments.py +++ b/contree_cli/arguments.py @@ -6,6 +6,7 @@ from contree_cli.cli import ( agent, auth, + build, cat, cd, cp, @@ -210,6 +211,7 @@ def register( register("use", "Set or show current session image", use.setup_parser, aliases=["ci"]) register("run", "Spawn a sandbox instance", run.setup_parser, aliases=["r"]) +register("build", "Build image from Dockerfile", build.setup_parser, aliases=["bd"]) register("images", "List and import images", images.setup_parser, aliases=["i", "img"]) register("tag", "Tag an image", tag.setup_parser, aliases=["t"]) register("ps", "List operations/instances", ps.setup_parser) diff --git a/contree_cli/cli/build.py b/contree_cli/cli/build.py new file mode 100644 index 0000000..e6b1453 --- /dev/null +++ b/contree_cli/cli/build.py @@ -0,0 +1,224 @@ +"""Build an image from a Dockerfile. + +Reads the Dockerfile at the given path (default ``/Dockerfile``) +and applies each directive against an isolated build session keyed by +the absolute path of the context directory. Successful layers are +materialised as branches named ``layer:`` so that +re-running the same Dockerfile reuses prior work. + +Supported directives (MVP): FROM, RUN, COPY, ADD (without URL/tar), +WORKDIR, ENV, ARG, USER. Other Dockerfile directives parse cleanly +but are skipped with a warning (CMD, ENTRYPOINT, LABEL, EXPOSE, +VOLUME, STOPSIGNAL, MAINTAINER, HEALTHCHECK, ONBUILD, SHELL). +""" + +from __future__ import annotations + +import argparse +import hashlib +import logging +from dataclasses import dataclass, field +from pathlib import Path + +from contree_cli import ( + CLIENT, + FORMATTER, + PROFILE, + SESSION_STORE, + ArgumentsProtocol, + SetupResult, +) +from contree_cli.docker import ( + ArgKeyword, + BuildContext, + DockerKeyword, + FromKeyword, + LocalContext, + RunKeyword, + parse_dockerfile, +) +from contree_cli.docker.context import BUILD_TIMEOUT_DEFAULT +from contree_cli.session import SessionStore +from contree_cli.types import FLAGS + +logger = logging.getLogger(__name__) + +EPILOG = """\ +examples: + contree build . + contree build . --tag myimage:latest + contree build --dockerfile ./Dockerfile.test ./app + contree build --build-arg VERSION=1.2 . + contree build --no-cache . + +for coding agents: + mutating command, may create operations against the API + layer cache is per-context (session keyed by abspath(context)) + use --no-cache to bypass cached layers and rebuild from scratch +""" + + +@dataclass(frozen=True) +class BuildArgs(ArgumentsProtocol): + context: str = "." + dockerfile: str = "" + tag: str = "" + build_args: tuple[str, ...] = field(default_factory=tuple) + no_cache: bool = False + timeout: int = BUILD_TIMEOUT_DEFAULT + + @classmethod + def from_args(cls, ns: argparse.Namespace) -> BuildArgs: + return cls( + context=ns.context or ".", + dockerfile=ns.dockerfile or "", + tag=ns.tag or "", + build_args=tuple(ns.build_arg or ()), + no_cache=bool(ns.no_cache), + timeout=ns.timeout, + ) + + +def setup_parser(p: argparse.ArgumentParser) -> SetupResult: + p.add_argument( + "context", + nargs="?", + default=".", + help="Build context directory", + ) + p.add_argument( + *FLAGS["dockerfile"], + default="", + metavar="PATH", + help="Dockerfile path (default: /Dockerfile)", + ) + p.add_argument( + *FLAGS["tag_name"], + default="", + metavar="NAME[:TAG]", + help="Tag the final image", + ) + p.add_argument( + *FLAGS["build_arg"], + action="append", + default=[], + metavar="KEY=VALUE", + help="Build-time variable (repeatable)", + ) + p.add_argument( + *FLAGS["no_cache"], + action="store_true", + help="Ignore cached layers and rebuild", + ) + p.add_argument( + *FLAGS["timeout"], + type=int, + default=BUILD_TIMEOUT_DEFAULT, + help="Timeout in seconds for each RUN step", + ) + return cmd_build, BuildArgs + + +def cmd_build(args: BuildArgs) -> int | None: + context_dir = Path(args.context).expanduser().resolve() + if not context_dir.is_dir(): + logger.error("context %s is not a directory", context_dir) + return 1 + + dockerfile_path = ( + Path(args.dockerfile).expanduser() + if args.dockerfile + else context_dir / "Dockerfile" + ) + if not dockerfile_path.is_file(): + logger.error("Dockerfile %s not found", dockerfile_path) + return 1 + + text = dockerfile_path.read_text() + try: + directives = parse_dockerfile(text) + except ValueError as exc: + logger.error("Dockerfile parse error: %s", exc) + return 1 + + if not validate_first_directive(directives): + logger.error("Dockerfile must contain a FROM directive") + return 1 + + build_args = parse_build_args(args.build_args) + + profile = PROFILE.get() + client = CLIENT.get() + session_key = make_session_key(context_dir) + store = SessionStore(profile.session_db_path, session_key) + SESSION_STORE.set(store) + + ctx = BuildContext( + client=client, + store=store, + local=LocalContext.from_dir(context_dir), + build_args=build_args, + no_cache=args.no_cache, + timeout=args.timeout, + ) + + try: + for kw in directives: + kw.execute(ctx) + finalize_pending(ctx) + except Exception as exc: + logger.error("build failed: %s", exc) + return 1 + + if not ctx.last_image: + logger.error("build produced no image") + return 1 + + if args.tag: + client.patch_json( + f"/v1/images/{ctx.last_image}/tag", + {"tag": args.tag}, + ) + logger.info("tagged %s as %s", ctx.last_image, args.tag) + + formatter = FORMATTER.get() + formatter( + image=ctx.last_image, + tag=args.tag, + session=session_key, + ) + formatter.flush() + return None + + +def validate_first_directive(directives: list[DockerKeyword]) -> bool: + for d in directives: + if isinstance(d, FromKeyword): + return True + if isinstance(d, ArgKeyword): + continue + return False + return False + + +def parse_build_args(items: tuple[str, ...]) -> dict[str, str]: + out: dict[str, str] = {} + for item in items: + if "=" not in item: + raise ValueError(f"--build-arg expected KEY=VALUE, got {item!r}") + k, _, v = item.partition("=") + out[k] = v + return out + + +def make_session_key(context_dir: Path) -> str: + digest = hashlib.sha256(str(context_dir).encode()).hexdigest() + return f"build:{digest[:16]}" + + +def finalize_pending(ctx: BuildContext) -> None: + """If COPY/ADD left files pending, commit them via a trivial RUN.""" + if not ctx.pending: + return + closer = RunKeyword(parts=(":",), shell_form=True) + closer.execute(ctx) diff --git a/contree_cli/cli/file.py b/contree_cli/cli/file.py index 9f42648..165be63 100644 --- a/contree_cli/cli/file.py +++ b/contree_cli/cli/file.py @@ -123,7 +123,7 @@ def _upload_and_record( """Upload a local file (with dedup) and record as pending.""" sha = _file_sha256(local_path) try: - resp = client.get("/v1/files", params={"sha256": sha}) + resp = client.get(f"/v1/files/{sha}") file_uuid = json.loads(resp.read())["uuid"] logger.info("File already exists on server (%s)", file_uuid) except ApiError as exc: diff --git a/contree_cli/cli/run.py b/contree_cli/cli/run.py index deb0d81..c7a2bd7 100644 --- a/contree_cli/cli/run.py +++ b/contree_cli/cli/run.py @@ -384,10 +384,11 @@ def record_local_uuid(mf: MappedFile, file_uuid: str, store: SessionStore) -> No def upload_one_remote(client: ContreeClient, mf: MappedFile) -> tuple[MappedFile, str]: """HTTP-only upload (sha256 dedup + POST /v1/files). Thread-safe.""" + sha = mf.sha256() try: - resp = client.get("/v1/files", params={"sha256": mf.sha256()}) + resp = client.get(f"/v1/files/{sha}") file_uuid = str(json.loads(resp.read())["uuid"]) - logger.info("Uploaded file: %s -> %s", mf.host_path, file_uuid) + logger.info("File reused: %s -> %s", mf.host_path, file_uuid) return mf, file_uuid except ApiError as exc: if exc.status != 404: diff --git a/contree_cli/docker/__init__.py b/contree_cli/docker/__init__.py new file mode 100644 index 0000000..1802208 --- /dev/null +++ b/contree_cli/docker/__init__.py @@ -0,0 +1,42 @@ +"""Dockerfile parser and keyword interpreters. + +Each Dockerfile directive is a ``DockerKeyword`` subclass living in its +own module. ``parse_dockerfile`` returns a list of directives ready to +be executed against a ``BuildContext``. +""" + +from .context import BuildContext, PendingFile +from .dockerignore import DockerignoreRule, is_ignored, parse_dockerignore +from .keyword import DockerKeyword, substitute +from .kw_add import AddKeyword +from .kw_arg import ArgKeyword +from .kw_copy import CopyKeyword +from .kw_env import EnvKeyword +from .kw_from import FromKeyword +from .kw_run import RunKeyword +from .kw_skipped import SkippedKeyword +from .kw_user import UserKeyword +from .kw_workdir import WorkdirKeyword +from .local_context import LocalContext +from .parser import parse_dockerfile + +__all__ = [ + "AddKeyword", + "ArgKeyword", + "BuildContext", + "CopyKeyword", + "DockerKeyword", + "DockerignoreRule", + "EnvKeyword", + "FromKeyword", + "LocalContext", + "PendingFile", + "RunKeyword", + "SkippedKeyword", + "UserKeyword", + "WorkdirKeyword", + "is_ignored", + "parse_dockerfile", + "parse_dockerignore", + "substitute", +] diff --git a/contree_cli/docker/context.py b/contree_cli/docker/context.py new file mode 100644 index 0000000..8e99cc9 --- /dev/null +++ b/contree_cli/docker/context.py @@ -0,0 +1,165 @@ +"""Mutable state shared across one ``contree build`` invocation.""" + +from __future__ import annotations + +import contextlib +import hashlib +import json +import logging +from dataclasses import dataclass, field + +from contree_cli.client import ContreeClient +from contree_cli.session import SessionStore + +from .local_context import LocalContext + +logger = logging.getLogger(__name__) + +BUILD_TIMEOUT_DEFAULT = 600 + + +@dataclass +class PendingFile: + instance_path: str + file_uuid: str + sha256: str + uid: int + gid: int + mode: str # octal like "0644" + + +@dataclass +class BuildContext: + client: ContreeClient + store: SessionStore + local: LocalContext + build_args: dict[str, str] = field(default_factory=dict) + declared_args: set[str] = field(default_factory=set) + arg_defaults: dict[str, str] = field(default_factory=dict) + env: dict[str, str] = field(default_factory=dict) + workdir: str = "/" + user: str = "" + parent_hash: str = "" + pending: list[PendingFile] = field(default_factory=list) + no_cache: bool = False + timeout: int = BUILD_TIMEOUT_DEFAULT + last_image: str = "" + last_op_uuid: str = "" + + def arg_values(self) -> dict[str, str]: + """Effective values for every declared ARG (build-arg overrides default).""" + return { + name: self.build_args.get(name, self.arg_defaults.get(name, "")) + for name in self.declared_args + } + + def substitute(self, text: str) -> str: + from .keyword import substitute + + merged = {**self.arg_values(), **self.env} + return substitute(text, merged) + + def state_repr(self) -> str: + return json.dumps( + { + "workdir": self.workdir, + "user": self.user, + "env": sorted(self.env.items()), + "args": sorted(self.arg_values().items()), + }, + sort_keys=True, + ) + + def pending_repr(self) -> str: + return json.dumps( + [ + { + "path": p.instance_path, + "sha": p.sha256, + "uid": p.uid, + "gid": p.gid, + "mode": p.mode, + } + for p in self.pending + ], + sort_keys=True, + ) + + def chain(self, contribution: str) -> str: + h = hashlib.sha256() + h.update(self.parent_hash.encode()) + h.update(b"\x00") + h.update(self.state_repr().encode()) + h.update(b"\x00") + h.update(contribution.encode()) + h.update(b"\x00") + h.update(self.pending_repr().encode()) + return h.hexdigest() + + @staticmethod + def short_hash(full: str) -> str: + return full[:16] + + def pending_files_payload(self) -> dict[str, object]: + return { + p.instance_path: { + "uuid": p.file_uuid, + "uid": p.uid, + "gid": p.gid, + "mode": p.mode, + } + for p in self.pending + } + + def try_cache_hit(self, branch_name: str) -> str | None: + """Return cached image_uuid if ``branch_name`` exists and cache is enabled.""" + if self.no_cache: + return None + try: + tip = self.store.branch_tip(branch_name) + except ValueError: + return None + self.store.switch_branch(branch_name) + self.last_image = tip.image_uuid + logger.info("layer cache hit: %s -> %s", branch_name, tip.image_uuid) + return tip.image_uuid + + def commit_layer( + self, + branch_name: str, + image_uuid: str, + *, + kind: str, + title: str, + operation_uuid: str = "", + ) -> None: + """Materialize a fresh layer branch pointing at ``image_uuid``. + + Forks from the currently active branch (the parent layer). When the + session is brand-new and has no active branch, the first ``set_image`` + bootstraps the implicit ``main`` branch before we fork. + """ + with contextlib.suppress(ValueError): + self.store.delete_branch(branch_name) + + if self.store.session is None: + self.store.set_image( + image_uuid, + kind=kind, + title=title, + operation_uuid=operation_uuid, + ) + self.store.create_branch(branch_name) + self.store.switch_branch(branch_name) + else: + self.store.create_branch(branch_name) + self.store.switch_branch(branch_name) + self.store.set_image( + image_uuid, + kind=kind, + title=title, + operation_uuid=operation_uuid, + ) + + self.last_image = image_uuid + self.last_op_uuid = operation_uuid diff --git a/contree_cli/docker/dockerignore.py b/contree_cli/docker/dockerignore.py new file mode 100644 index 0000000..def72c5 --- /dev/null +++ b/contree_cli/docker/dockerignore.py @@ -0,0 +1,102 @@ +"""Parse and match ``.dockerignore`` rules against build-context paths. + +Rules are matched in order against POSIX-style paths relative to the context +root. The last matching rule wins (``!`` re-includes a previously ignored +path). Globs: ``*`` matches anything except ``/``, ``**`` matches zero or +more path components, ``?`` matches one character, ``[...]`` is a class. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class DockerignoreRule: + negate: bool + regex: re.Pattern[str] + raw: str + + +def parse_dockerignore(context_dir: Path) -> tuple[DockerignoreRule, ...]: + """Read ``.dockerignore`` from ``context_dir`` and return the rule list.""" + path = context_dir / ".dockerignore" + if not path.is_file(): + return () + rules: list[DockerignoreRule] = [] + for raw_line in path.read_text().splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + negate = line.startswith("!") + if negate: + line = line[1:].strip() + regex_str = pattern_to_regex(line) + rules.append( + DockerignoreRule( + negate=negate, + regex=re.compile(regex_str), + raw=raw_line, + ) + ) + return tuple(rules) + + +def is_ignored(rel_path: str, rules: tuple[DockerignoreRule, ...]) -> bool: + """Apply rules in order; last match wins (negation re-includes).""" + ignored = False + for rule in rules: + if rule.regex.fullmatch(rel_path): + ignored = not rule.negate + return ignored + + +def pattern_to_regex(pattern: str) -> str: + """Translate a ``.dockerignore`` glob into a Python regex string. + + Handles trailing ``/`` (directory + all contents), ``**`` (any number of + path components), ``*`` (one segment), ``?`` (one char), ``[...]`` class. + Other regex metacharacters are escaped. + """ + is_dir = pattern.endswith("/") + if is_dir: + pattern = pattern.rstrip("/") + pattern = pattern.lstrip("/") + + out: list[str] = [] + i = 0 + while i < len(pattern): + match pattern[i : i + 3], pattern[i : i + 2], pattern[i]: + case ("**/", _, _): + out.append("(?:.*/)?") + i += 3 + case (_, "**", _): + out.append(".*") + i += 2 + case (_, _, "*"): + out.append("[^/]*") + i += 1 + case (_, _, "?"): + out.append("[^/]") + i += 1 + case (_, _, "["): + end = pattern.find("]", i + 1) + if end == -1: + out.append(re.escape("[")) + i += 1 + else: + out.append(pattern[i : end + 1]) + i = end + 1 + case (_, _, "/"): + out.append("/") + i += 1 + case (_, _, ch): + out.append(re.escape(ch)) + i += 1 + + regex = "".join(out) + if is_dir: + regex += "(?:/.*)?" + return regex diff --git a/contree_cli/docker/keyword.py b/contree_cli/docker/keyword.py new file mode 100644 index 0000000..7482df4 --- /dev/null +++ b/contree_cli/docker/keyword.py @@ -0,0 +1,86 @@ +"""Base class for Dockerfile keywords plus shared helpers.""" + +from __future__ import annotations + +import json +import logging +import re +from dataclasses import dataclass +from typing import ClassVar + +from .context import BuildContext + +logger = logging.getLogger(__name__) + +SUB_RE = re.compile(r"\$(?:\{([A-Za-z_][A-Za-z0-9_]*)\}|([A-Za-z_][A-Za-z0-9_]*))") + + +def substitute(text: str, env: dict[str, str]) -> str: + """Expand ``$VAR`` / ``${VAR}`` against ``env``. Missing names expand to ''.""" + + def repl(m: re.Match[str]) -> str: + name = m.group(1) or m.group(2) + return env.get(name, "") + + return SUB_RE.sub(repl, text) + + +def parse_command_form(rest: str) -> tuple[list[str], bool]: + """Parse the argument to ``RUN``/``COPY``/``ADD``/``CMD``. + + Returns ``(parts, shell_form)`` where ``shell_form`` is ``True`` when the + directive used the bare shell syntax. ``parts`` for shell-form contains a + single joined string. JSON exec-form returns the list as-is. + """ + stripped = rest.lstrip() + if stripped.startswith("["): + try: + parsed = json.loads(stripped) + except ValueError as exc: + raise ValueError(f"invalid JSON exec-form: {rest!r}") from exc + if not isinstance(parsed, list) or not all(isinstance(p, str) for p in parsed): + raise ValueError(f"exec-form must be a list of strings: {rest!r}") + return list(parsed), False + return [rest], True + + +def parse_keyval_pairs(rest: str) -> dict[str, str]: + """Parse ``KEY1=VAL1 KEY2=VAL2`` or the single-pair form ``KEY VAL``. + + Quoted values are supported via ``shlex``-style splitting on whitespace. + """ + import shlex + + tokens = shlex.split(rest) + if not tokens: + return {} + if "=" not in tokens[0]: + # Legacy form: ENV KEY VALUE (whole rest after first token is the value) + key = tokens[0] + value = rest.split(None, 1)[1] if len(rest.split(None, 1)) > 1 else "" + return {key: value.strip()} + pairs: dict[str, str] = {} + for t in tokens: + if "=" not in t: + raise ValueError(f"expected KEY=VALUE, got {t!r}") + k, _, v = t.partition("=") + pairs[k] = v + return pairs + + +@dataclass(frozen=True) +class DockerKeyword: + """Base class. Subclasses implement ``parse``, ``serialize``, ``execute``.""" + + NAME: ClassVar[str] = "" + + @classmethod + def parse(cls, args_text: str) -> DockerKeyword: + raise NotImplementedError + + def serialize(self) -> str: + """Stable text used for layer hashing.""" + raise NotImplementedError + + def execute(self, ctx: BuildContext) -> None: + raise NotImplementedError diff --git a/contree_cli/docker/kw_add.py b/contree_cli/docker/kw_add.py new file mode 100644 index 0000000..d4688ef --- /dev/null +++ b/contree_cli/docker/kw_add.py @@ -0,0 +1,60 @@ +"""``ADD [--chown=...] [--chmod=...] SRC... DEST`` - file/dir variant of COPY. + +URL fetches and tar extraction (the parts of ``ADD`` that distinguish it from +``COPY``) are not supported in the MVP - those inputs emit a warning and are +skipped. +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from typing import ClassVar + +from .context import BuildContext +from .keyword import DockerKeyword +from .kw_copy import parse_copy_like, stage_copy + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class AddKeyword(DockerKeyword): + NAME: ClassVar[str] = "ADD" + sources: tuple[str, ...] = field(default_factory=tuple) + dest: str = "" + chown: str = "" + chmod: str = "" + from_stage: str = "" + + @classmethod + def parse(cls, args_text: str) -> AddKeyword: + return parse_copy_like(cls, args_text, "ADD") + + def serialize(self) -> str: + return ( + f"ADD chown={self.chown} chmod={self.chmod} " + f"sources={json.dumps(list(self.sources))} dest={self.dest}" + ) + + def execute(self, ctx: BuildContext) -> None: + if self.from_stage: + logger.warning("ADD --from=%s not supported, skipping", self.from_stage) + return + + url_sources = [s for s in self.sources if is_url(s)] + if url_sources: + for url in url_sources: + logger.warning("ADD URL %s not supported, skipping", url) + local_sources = tuple(s for s in self.sources if not is_url(s)) + if not local_sources: + return + stage_copy(ctx, local_sources, self.dest, self.chown, self.chmod) + return + + stage_copy(ctx, self.sources, self.dest, self.chown, self.chmod) + + +def is_url(value: str) -> bool: + return value.startswith(("http://", "https://", "ftp://")) diff --git a/contree_cli/docker/kw_arg.py b/contree_cli/docker/kw_arg.py new file mode 100644 index 0000000..f8769a7 --- /dev/null +++ b/contree_cli/docker/kw_arg.py @@ -0,0 +1,36 @@ +"""``ARG NAME[=DEFAULT]`` - declare a build-time variable.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import ClassVar + +from .context import BuildContext +from .keyword import DockerKeyword + + +@dataclass(frozen=True) +class ArgKeyword(DockerKeyword): + NAME: ClassVar[str] = "ARG" + name: str = "" + default: str | None = None + + @classmethod + def parse(cls, args_text: str) -> ArgKeyword: + raw = args_text.strip() + if not raw: + raise ValueError("ARG requires a name") + if "=" in raw: + name, _, default = raw.partition("=") + return cls(name=name.strip(), default=default.strip()) + return cls(name=raw, default=None) + + def serialize(self) -> str: + if self.default is None: + return f"ARG {self.name}" + return f"ARG {self.name}={self.default}" + + def execute(self, ctx: BuildContext) -> None: + ctx.declared_args.add(self.name) + if self.default is not None and self.name not in ctx.arg_defaults: + ctx.arg_defaults[self.name] = self.default diff --git a/contree_cli/docker/kw_copy.py b/contree_cli/docker/kw_copy.py new file mode 100644 index 0000000..65df59d --- /dev/null +++ b/contree_cli/docker/kw_copy.py @@ -0,0 +1,159 @@ +"""``COPY [--chown=...] [--chmod=...] SRC... DEST`` - stage files into the build.""" + +from __future__ import annotations + +import json +import logging +import posixpath +import shlex +from dataclasses import dataclass, field +from typing import ClassVar, TypeVar + +from contree_cli.cli.run import upload_files + +from .context import BuildContext, PendingFile +from .keyword import DockerKeyword + +logger = logging.getLogger(__name__) + +T = TypeVar("T", bound=DockerKeyword) + + +@dataclass(frozen=True) +class CopyKeyword(DockerKeyword): + NAME: ClassVar[str] = "COPY" + sources: tuple[str, ...] = field(default_factory=tuple) + dest: str = "" + chown: str = "" + chmod: str = "" + from_stage: str = "" + + @classmethod + def parse(cls, args_text: str) -> CopyKeyword: + return parse_copy_like(cls, args_text, "COPY") + + def serialize(self) -> str: + return ( + f"COPY chown={self.chown} chmod={self.chmod} " + f"sources={json.dumps(list(self.sources))} dest={self.dest}" + ) + + def execute(self, ctx: BuildContext) -> None: + if self.from_stage: + logger.warning("COPY --from=%s not supported, skipping", self.from_stage) + return + stage_copy(ctx, self.sources, self.dest, self.chown, self.chmod) + + +def parse_copy_like(cls: type[T], args_text: str, label: str) -> T: + """Shared parser for COPY and ADD shell-style syntax.""" + raw = args_text.strip() + if not raw: + raise ValueError(f"{label} requires SRC and DEST") + stripped = raw.lstrip() + if stripped.startswith("["): + try: + parsed = json.loads(stripped) + except ValueError as exc: + raise ValueError(f"invalid JSON exec-form: {raw!r}") from exc + if ( + not isinstance(parsed, list) + or len(parsed) < 2 + or not all(isinstance(p, str) for p in parsed) + ): + raise ValueError(f"{label} exec-form must be a list of >=2 strings") + return cls(sources=tuple(parsed[:-1]), dest=parsed[-1]) # type: ignore[call-arg] + + tokens = shlex.split(raw) + chown = "" + chmod = "" + from_stage = "" + positional: list[str] = [] + for t in tokens: + if t.startswith("--chown="): + chown = t.partition("=")[2] + elif t.startswith("--chmod="): + chmod = t.partition("=")[2] + elif t.startswith("--from="): + from_stage = t.partition("=")[2] + elif t.startswith("--"): + raise ValueError(f"unknown {label} option: {t!r}") + else: + positional.append(t) + if len(positional) < 2: + raise ValueError(f"{label} requires at least one source and a destination") + return cls( # type: ignore[call-arg] + sources=tuple(positional[:-1]), + dest=positional[-1], + chown=chown, + chmod=chmod, + from_stage=from_stage, + ) + + +def stage_copy( + ctx: BuildContext, + sources: tuple[str, ...], + dest: str, + chown: str, + chmod: str, +) -> None: + """Resolve sources via ``LocalContext``, upload, append to ``ctx.pending``.""" + sub_sources = tuple(ctx.substitute(s) for s in sources) + sub_dest = ctx.substitute(dest) + sub_chown = ctx.substitute(chown) + sub_chmod = ctx.substitute(chmod) + + if not posixpath.isabs(sub_dest): + sub_dest = posixpath.normpath(posixpath.join(ctx.workdir or "/", sub_dest)) + + uid, gid = parse_chown(sub_chown) + mode_override = parse_chmod(sub_chmod) + + mapped = ctx.local.collect( + sub_sources, + sub_dest, + uid=uid, + gid=gid, + mode_override=mode_override, + ) + if not mapped: + return + + uploaded = upload_files(ctx.client, mapped, ctx.store) + for mf in mapped: + ctx.pending.append( + PendingFile( + instance_path=mf.instance_path, + file_uuid=uploaded[mf.host_path], + sha256=mf.sha256(), + uid=mf.uid, + gid=mf.gid, + mode=f"{mf.mode:04o}", + ) + ) + + +def parse_chown(spec: str) -> tuple[int, int]: + if not spec: + return 0, 0 + user, _, group = spec.partition(":") + uid = resolve_id(user) if user else 0 + gid = resolve_id(group) if group else uid + return uid, gid + + +def parse_chmod(spec: str) -> int | None: + if not spec: + return None + try: + return int(spec, 8) + except ValueError: + raise ValueError(f"invalid chmod value: {spec!r}") from None + + +def resolve_id(value: str) -> int: + try: + return int(value) + except ValueError: + return 0 diff --git a/contree_cli/docker/kw_env.py b/contree_cli/docker/kw_env.py new file mode 100644 index 0000000..11253db --- /dev/null +++ b/contree_cli/docker/kw_env.py @@ -0,0 +1,30 @@ +"""``ENV KEY=VALUE [KEY=VALUE ...]`` - set persistent environment variables.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import ClassVar + +from .context import BuildContext +from .keyword import DockerKeyword, parse_keyval_pairs + + +@dataclass(frozen=True) +class EnvKeyword(DockerKeyword): + NAME: ClassVar[str] = "ENV" + pairs: tuple[tuple[str, str], ...] = field(default_factory=tuple) + + @classmethod + def parse(cls, args_text: str) -> EnvKeyword: + raw = args_text.strip() + if not raw: + raise ValueError("ENV requires KEY=VALUE") + pairs = parse_keyval_pairs(raw) + return cls(pairs=tuple(pairs.items())) + + def serialize(self) -> str: + return "ENV " + " ".join(f"{k}={v}" for k, v in self.pairs) + + def execute(self, ctx: BuildContext) -> None: + for key, value in self.pairs: + ctx.env[key] = ctx.substitute(value) diff --git a/contree_cli/docker/kw_from.py b/contree_cli/docker/kw_from.py new file mode 100644 index 0000000..7c00789 --- /dev/null +++ b/contree_cli/docker/kw_from.py @@ -0,0 +1,113 @@ +"""``FROM image[:tag] [AS name]`` - set the base image for the build.""" + +from __future__ import annotations + +import contextlib +import hashlib +import json +import logging +import time +from dataclasses import dataclass +from typing import ClassVar + +from contree_cli.cli.images import normalize_registry_url +from contree_cli.client import ApiError, resolve_image + +from .context import BuildContext +from .keyword import DockerKeyword + +logger = logging.getLogger(__name__) + +TERMINAL_STATUSES = frozenset({"SUCCESS", "FAILED", "CANCELLED"}) + + +@dataclass(frozen=True) +class FromKeyword(DockerKeyword): + NAME: ClassVar[str] = "FROM" + image_ref: str = "" + alias: str = "" + + @classmethod + def parse(cls, args_text: str) -> FromKeyword: + raw = args_text.strip() + if not raw: + raise ValueError("FROM requires an image reference") + parts = raw.split() + if len(parts) == 1: + return cls(image_ref=parts[0], alias="") + if len(parts) == 3 and parts[1].upper() == "AS": + return cls(image_ref=parts[0], alias=parts[2]) + raise ValueError(f"invalid FROM syntax: {raw!r}") + + def serialize(self) -> str: + return f"FROM {self.image_ref}" + (f" AS {self.alias}" if self.alias else "") + + def execute(self, ctx: BuildContext) -> None: + ref = ctx.substitute(self.image_ref) + image_uuid = resolve_or_import(ctx, ref) + + from_hash = hashlib.sha256(f"FROM:{image_uuid}".encode()).hexdigest() + branch_name = f"layer:{BuildContext.short_hash(from_hash)}" + + ctx.pending.clear() + cached = ctx.try_cache_hit(branch_name) + if cached is not None: + ctx.parent_hash = from_hash + return + + ctx.commit_layer( + branch_name, + image_uuid, + kind="use", + title=f"FROM {ref}", + ) + ctx.parent_hash = from_hash + + +def resolve_or_import(ctx: BuildContext, ref: str) -> str: + """Resolve ``ref`` to a UUID, importing from a registry on miss.""" + try: + return resolve_image(ctx.client, ref) + except ApiError as exc: + if exc.status != 404: + raise + + url = normalize_registry_url(ref) + tag = ref if not ref.startswith("docker://") else url.removeprefix("docker://") + logger.info("FROM auto-import %s as tag %s", url, tag) + + payload: dict[str, object] = {"registry": {"url": url}, "tag": tag} + if ctx.timeout: + payload["timeout"] = ctx.timeout + resp = ctx.client.post_json("/v1/images/import", payload) + op = json.loads(resp.read()) + op_uuid: str = op["uuid"] + + try: + return wait_import(ctx, op_uuid, tag) + except KeyboardInterrupt: + with contextlib.suppress(ApiError, OSError): + ctx.client.delete(f"/v1/operations/{op_uuid}") + raise + + +def wait_import(ctx: BuildContext, op_uuid: str, tag: str) -> str: + delay = 1.0 + while True: + time.sleep(delay) + resp = ctx.client.get(f"/v1/operations/{op_uuid}") + op = json.loads(resp.read()) + if op["status"] in TERMINAL_STATUSES: + break + if delay < 5: + delay += delay + if op["status"] != "SUCCESS": + raise RuntimeError( + f"image import {tag!r} ended with {op['status']}" + + (f": {op.get('error', '')}" if op.get("error") else "") + ) + result = op.get("result") or {} + image = result.get("image") + if not image: + raise RuntimeError(f"image import {tag!r} returned no image") + return str(image) diff --git a/contree_cli/docker/kw_run.py b/contree_cli/docker/kw_run.py new file mode 100644 index 0000000..dac33f4 --- /dev/null +++ b/contree_cli/docker/kw_run.py @@ -0,0 +1,184 @@ +"""``RUN ...`` - execute a command and capture the resulting image.""" + +from __future__ import annotations + +import json +import logging +import shlex +import time +from dataclasses import dataclass, field +from typing import ClassVar + +from contree_cli.client import decode_stream + +from .context import BuildContext +from .keyword import DockerKeyword, parse_command_form + +logger = logging.getLogger(__name__) + +TERMINAL_STATUSES = frozenset({"SUCCESS", "FAILED", "CANCELLED"}) + + +@dataclass(frozen=True) +class RunKeyword(DockerKeyword): + NAME: ClassVar[str] = "RUN" + parts: tuple[str, ...] = field(default_factory=tuple) + shell_form: bool = True + + @classmethod + def parse(cls, args_text: str) -> RunKeyword: + raw = args_text.strip() + if not raw: + raise ValueError("RUN requires a command") + parts, shell_form = parse_command_form(raw) + return cls(parts=tuple(parts), shell_form=shell_form) + + def serialize(self) -> str: + if self.shell_form: + return f"RUN {self.parts[0]}" + return f"RUN {json.dumps(list(self.parts))}" + + def execute(self, ctx: BuildContext) -> None: + sub_parts = tuple(ctx.substitute(p) for p in self.parts) + contribution = ( + f"RUN shell={self.shell_form} parts={json.dumps(list(sub_parts))}" + ) + chain = ctx.chain(contribution) + branch_name = f"layer:{BuildContext.short_hash(chain)}" + + cached = ctx.try_cache_hit(branch_name) + if cached is not None: + ctx.parent_hash = chain + ctx.pending.clear() + return + + new_image, op_uuid = self.spawn(ctx, sub_parts) + ctx.commit_layer( + branch_name, + new_image, + kind="run", + title=display_title(sub_parts, self.shell_form), + operation_uuid=op_uuid, + ) + ctx.parent_hash = chain + ctx.pending.clear() + + def spawn(self, ctx: BuildContext, parts: tuple[str, ...]) -> tuple[str, str]: + command, args, shell = build_command(parts, self.shell_form, ctx.user) + payload: dict[str, object] = { + "image": ctx.last_image, + "command": command, + "shell": shell, + "disposable": False, + "hostname": "linuxkit", + "truncate_output_at": 65536, + } + if args: + payload["args"] = args + if ctx.timeout: + payload["timeout"] = ctx.timeout + if ctx.workdir and ctx.workdir != "/": + payload["cwd"] = ctx.workdir + if ctx.env: + payload["env"] = dict(ctx.env) + if ctx.pending: + payload["files"] = ctx.pending_files_payload() + + resp = ctx.client.post_json("/v1/instances", payload) + op = json.loads(resp.read()) + op_uuid: str = op["uuid"] + logger.info( + "RUN spawned op=%s: %s", op_uuid, display_title(parts, self.shell_form) + ) + + op = poll(ctx, op_uuid) + check_success(op, parts, self.shell_form) + result = op.get("result") or {} + assert isinstance(result, dict) + new_image = result.get("image") + if not new_image: + raise RuntimeError("RUN succeeded but no image was produced") + log_streams(op) + return str(new_image), op_uuid + + +def poll(ctx: BuildContext, op_uuid: str) -> dict[str, object]: + delay = 0.5 + while True: + time.sleep(delay) + resp = ctx.client.get(f"/v1/operations/{op_uuid}") + op = json.loads(resp.read()) + if op["status"] in TERMINAL_STATUSES: + return op # type: ignore[no-any-return] + if delay < 5: + delay += delay + + +def check_success( + op: dict[str, object], parts: tuple[str, ...], shell_form: bool +) -> None: + metadata = op.get("metadata") or {} + assert isinstance(metadata, dict) + instance_result = metadata.get("result") or {} + assert isinstance(instance_result, dict) + state = instance_result.get("state") or {} + assert isinstance(state, dict) + exit_code = state.get("exit_code") + title = display_title(parts, shell_form) + if op["status"] != "SUCCESS": + stderr = decode_stream(instance_result.get("stderr")) + raise RuntimeError( + f"RUN {title!r} ended with {op['status']}: {op.get('error') or stderr}" + ) + if isinstance(exit_code, int) and exit_code != 0: + stdout = decode_stream(instance_result.get("stdout")) + stderr = decode_stream(instance_result.get("stderr")) + raise RuntimeError( + f"RUN {title!r} exited with code {exit_code}\n" + f"stdout: {stdout}\nstderr: {stderr}" + ) + + +def log_streams(op: dict[str, object]) -> None: + metadata = op.get("metadata") or {} + assert isinstance(metadata, dict) + instance_result = metadata.get("result") or {} + assert isinstance(instance_result, dict) + stdout = decode_stream(instance_result.get("stdout")) + stderr = decode_stream(instance_result.get("stderr")) + if stdout: + logger.info("stdout:\n%s", stdout) + if stderr: + logger.info("stderr:\n%s", stderr) + + +def build_command( + parts: tuple[str, ...], + shell_form: bool, + user: str, +) -> tuple[str, list[str], bool]: + """Map parsed RUN parts plus optional USER into an API payload triple.""" + if shell_form: + expr = parts[0] + if user: + wrapped = wrap_with_user(expr, user) + return wrapped, [], True + return expr, [], True + + cmd = parts[0] + args = list(parts[1:]) + if user: + joined = shlex.join([cmd, *args]) + wrapped = wrap_with_user(joined, user) + return wrapped, [], True + return cmd, args, False + + +def wrap_with_user(expr: str, user: str) -> str: + return f"su -s /bin/sh -c {shlex.quote(expr)} {shlex.quote(user)}" + + +def display_title(parts: tuple[str, ...], shell_form: bool) -> str: + if shell_form: + return f"RUN {parts[0]}"[:200] + return f"RUN {json.dumps(list(parts))}"[:200] diff --git a/contree_cli/docker/kw_skipped.py b/contree_cli/docker/kw_skipped.py new file mode 100644 index 0000000..7a3484f --- /dev/null +++ b/contree_cli/docker/kw_skipped.py @@ -0,0 +1,27 @@ +"""Keywords that the MVP recognises but does not implement.""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass + +from .context import BuildContext +from .keyword import DockerKeyword + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class SkippedKeyword(DockerKeyword): + name: str = "" + raw: str = "" + + @classmethod + def of(cls, name: str, raw: str) -> SkippedKeyword: + return cls(name=name.upper(), raw=raw) + + def serialize(self) -> str: + return f"{self.name}:{self.raw}" + + def execute(self, ctx: BuildContext) -> None: + logger.warning("directive %s not supported, skipping", self.name) diff --git a/contree_cli/docker/kw_user.py b/contree_cli/docker/kw_user.py new file mode 100644 index 0000000..1fa3f9a --- /dev/null +++ b/contree_cli/docker/kw_user.py @@ -0,0 +1,28 @@ +"""``USER name[:group]`` - run subsequent commands as the given user.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import ClassVar + +from .context import BuildContext +from .keyword import DockerKeyword + + +@dataclass(frozen=True) +class UserKeyword(DockerKeyword): + NAME: ClassVar[str] = "USER" + spec: str = "" + + @classmethod + def parse(cls, args_text: str) -> UserKeyword: + raw = args_text.strip() + if not raw: + raise ValueError("USER requires a name") + return cls(spec=raw) + + def serialize(self) -> str: + return f"USER {self.spec}" + + def execute(self, ctx: BuildContext) -> None: + ctx.user = ctx.substitute(self.spec) diff --git a/contree_cli/docker/kw_workdir.py b/contree_cli/docker/kw_workdir.py new file mode 100644 index 0000000..f219642 --- /dev/null +++ b/contree_cli/docker/kw_workdir.py @@ -0,0 +1,34 @@ +"""``WORKDIR /path`` - set the working directory for subsequent directives.""" + +from __future__ import annotations + +import posixpath +from dataclasses import dataclass +from typing import ClassVar + +from .context import BuildContext +from .keyword import DockerKeyword + + +@dataclass(frozen=True) +class WorkdirKeyword(DockerKeyword): + NAME: ClassVar[str] = "WORKDIR" + path: str = "" + + @classmethod + def parse(cls, args_text: str) -> WorkdirKeyword: + raw = args_text.strip() + if not raw: + raise ValueError("WORKDIR requires a path") + return cls(path=raw) + + def serialize(self) -> str: + return f"WORKDIR {self.path}" + + def execute(self, ctx: BuildContext) -> None: + target = ctx.substitute(self.path) + if posixpath.isabs(target): + ctx.workdir = posixpath.normpath(target) + else: + base = ctx.workdir or "/" + ctx.workdir = posixpath.normpath(posixpath.join(base, target)) diff --git a/contree_cli/docker/local_context.py b/contree_cli/docker/local_context.py new file mode 100644 index 0000000..a5ea342 --- /dev/null +++ b/contree_cli/docker/local_context.py @@ -0,0 +1,159 @@ +"""Local build context: the host directory + ``.dockerignore`` filter. + +Encapsulates everything we need to assemble the set of files that will be +uploaded to the API as part of a build: the root directory, the parsed +``.dockerignore`` rules, and the directory-walking logic that turns +``COPY``/``ADD`` source specs into concrete ``MappedFile`` entries. +""" + +from __future__ import annotations + +import fnmatch +import os +import posixpath +from dataclasses import dataclass, field +from pathlib import Path + +from contree_cli.cli.run import DEFAULT_FILE_EXCLUDES +from contree_cli.mapped_file import MappedFile + +from .dockerignore import DockerignoreRule, is_ignored, parse_dockerignore + + +@dataclass(frozen=True) +class LocalContext: + """Read-only handle for the local build context directory.""" + + root: Path + dockerignore: tuple[DockerignoreRule, ...] = field(default_factory=tuple) + + @classmethod + def from_dir(cls, root: Path) -> LocalContext: + return cls(root=root.resolve(), dockerignore=parse_dockerignore(root)) + + def is_ignored(self, rel_path: str) -> bool: + if is_ignored(rel_path, self.dockerignore): + return True + return matches_default_excludes(rel_path) + + def collect( + self, + sources: tuple[str, ...], + dest: str, + *, + uid: int, + gid: int, + mode_override: int | None, + ) -> list[MappedFile]: + """Walk every source, return ``MappedFile`` rows for upload.""" + mapped: list[MappedFile] = [] + for src in sources: + host_path = (self.root / src).resolve() + if not str(host_path).startswith(str(self.root)): + raise ValueError(f"COPY/ADD source escapes context: {src!r}") + mapped.extend(self.walk(host_path, dest, sources, uid, gid, mode_override)) + return mapped + + def walk( + self, + host_path: Path, + dest: str, + sources: tuple[str, ...], + uid: int, + gid: int, + mode_override: int | None, + ) -> list[MappedFile]: + if host_path.is_file(): + return self.walk_file(host_path, dest, sources, uid, gid, mode_override) + if host_path.is_dir(): + return self.walk_dir(host_path, dest, uid, gid, mode_override) + raise FileNotFoundError(f"COPY/ADD source not found: {host_path}") + + def walk_file( + self, + host_path: Path, + dest: str, + sources: tuple[str, ...], + uid: int, + gid: int, + mode_override: int | None, + ) -> list[MappedFile]: + rel = host_path.relative_to(self.root).as_posix() + if self.is_ignored(rel): + return [] + if dest.endswith("/") or len(sources) > 1: + instance_path = posixpath.join(dest.rstrip("/"), host_path.name) + else: + instance_path = dest + mode = ( + mode_override + if mode_override is not None + else (host_path.stat().st_mode & 0o7777) + ) + return [ + MappedFile( + host_path=str(host_path), + instance_path=instance_path, + uid=uid, + gid=gid, + mode=mode, + ) + ] + + def walk_dir( + self, + host_path: Path, + dest: str, + uid: int, + gid: int, + mode_override: int | None, + ) -> list[MappedFile]: + base = dest.rstrip("/") or "/" + result: list[MappedFile] = [] + for root, dirs, files in os.walk(str(host_path), topdown=True): + rel_root = os.path.relpath(root, str(self.root)) + rel_root_posix = "" if rel_root == "." else rel_root.replace(os.sep, "/") + dirs[:] = [ + d + for d in dirs + if not self.is_ignored( + d if not rel_root_posix else f"{rel_root_posix}/{d}" + ) + ] + for name in files: + rel_file = name if not rel_root_posix else f"{rel_root_posix}/{name}" + if self.is_ignored(rel_file): + continue + full = os.path.join(root, name) + if not os.path.isfile(full): + continue + # Path of the file relative to the *source* dir so that + # directory copies preserve their internal layout under DEST. + rel_to_source = os.path.relpath(full, str(host_path)) + rel_to_source_posix = rel_to_source.replace(os.sep, "/") + instance_path = f"{base.rstrip('/')}/{rel_to_source_posix}" + mode = ( + mode_override + if mode_override is not None + else (os.stat(full).st_mode & 0o7777) + ) + result.append( + MappedFile( + host_path=full, + instance_path=instance_path, + uid=uid, + gid=gid, + mode=mode, + ) + ) + return result + + +def matches_default_excludes(rel_path: str) -> bool: + parts = rel_path.split("/") + for pattern in DEFAULT_FILE_EXCLUDES: + if fnmatch.fnmatch(rel_path, pattern): + return True + if any(fnmatch.fnmatch(part, pattern) for part in parts): + return True + return False diff --git a/contree_cli/docker/parser.py b/contree_cli/docker/parser.py new file mode 100644 index 0000000..71c2421 --- /dev/null +++ b/contree_cli/docker/parser.py @@ -0,0 +1,93 @@ +"""Parse a Dockerfile into a list of ``DockerKeyword`` instances.""" + +from __future__ import annotations + +import logging +from collections.abc import Mapping +from types import MappingProxyType + +from .keyword import DockerKeyword +from .kw_add import AddKeyword +from .kw_arg import ArgKeyword +from .kw_copy import CopyKeyword +from .kw_env import EnvKeyword +from .kw_from import FromKeyword +from .kw_run import RunKeyword +from .kw_skipped import SkippedKeyword +from .kw_user import UserKeyword +from .kw_workdir import WorkdirKeyword + +logger = logging.getLogger(__name__) + + +KEYWORDS: Mapping[str, type[DockerKeyword]] = MappingProxyType( + { + "FROM": FromKeyword, + "RUN": RunKeyword, + "COPY": CopyKeyword, + "ADD": AddKeyword, + "WORKDIR": WorkdirKeyword, + "ENV": EnvKeyword, + "ARG": ArgKeyword, + "USER": UserKeyword, + } +) + + +SKIPPED_NAMES = frozenset( + { + "CMD", + "ENTRYPOINT", + "LABEL", + "EXPOSE", + "VOLUME", + "STOPSIGNAL", + "MAINTAINER", + "HEALTHCHECK", + "ONBUILD", + "SHELL", + } +) + + +def parse_dockerfile(text: str) -> list[DockerKeyword]: + """Tokenise ``text`` into directives. + + Joins backslash-continued lines, drops comment/blank lines, then + dispatches by leading keyword. + """ + merged = join_continuations(text) + result: list[DockerKeyword] = [] + for raw in merged: + line = raw.strip() + if not line or line.startswith("#"): + continue + head, _, rest = line.partition(" ") + keyword = head.upper() + if keyword in KEYWORDS: + result.append(KEYWORDS[keyword].parse(rest)) + elif keyword in SKIPPED_NAMES: + result.append(SkippedKeyword.of(keyword, rest)) + else: + raise ValueError(f"unknown Dockerfile directive: {head!r}") + return result + + +def join_continuations(text: str) -> list[str]: + """Merge lines ending with ``\\`` into single logical lines.""" + out: list[str] = [] + buf: list[str] = [] + for raw_line in text.splitlines(): + line = raw_line.rstrip() + if line.endswith("\\"): + buf.append(line[:-1]) + continue + if buf: + buf.append(line) + out.append(" ".join(s.strip() for s in buf)) + buf = [] + else: + out.append(line) + if buf: + out.append(" ".join(s.strip() for s in buf)) + return out diff --git a/contree_cli/skill_body.md b/contree_cli/skill_body.md index 0cdcc49..1ac94ef 100644 --- a/contree_cli/skill_body.md +++ b/contree_cli/skill_body.md @@ -160,6 +160,9 @@ Unsure about sessions? Run `contree session --help` or `contree agent sessions` - `use`: bind the session to an image or reusable tag. - `run`: execute a command in the current session image. +- `build`: interpret a `Dockerfile` and produce a tagged image, reusing + cached layers per context directory. Prefer this over hand-running + each Dockerfile step when one already exists. - `ls` / `cat`: inspect files from the image without spawning a VM. - `cp`: download a file from the image to the host. - `file edit`: open a remote file in a host editor and stage it for the next run. @@ -432,6 +435,34 @@ contree -S agent_task_nim cp /work/project/main ./results/nim/ Each subagent works in complete isolation. The parent agent collects `./results//` after all subagents finish. +## Building from a Dockerfile + +When a repo already has a `Dockerfile`, do not reproduce each step by +hand. Run `contree build` instead: + +```bash +contree build . --tag myapp:dev +contree build ./app --dockerfile ./app/Dockerfile.prod --tag svc:prod +contree build . --build-arg VERSION=1.2 +contree build . --no-cache +``` + +- Cache is keyed by `abspath(CONTEXT)`. Same context + same Dockerfile + + same build args = full layer cache hit on re-runs. +- Supported directives: `FROM`, `RUN`, `COPY`, `ADD` (local paths + only), `WORKDIR`, `ENV`, `ARG`, `USER`. `CMD`/`ENTRYPOINT`/`LABEL` + /`EXPOSE`/`VOLUME`/`STOPSIGNAL`/`MAINTAINER`/`HEALTHCHECK`/`ONBUILD` + /`SHELL` are parsed but skipped with a warning. +- Multi-stage (`FROM ... AS x`, `COPY --from=x`) is not yet supported; + use a single linear pipeline for now. +- `/.dockerignore` filters `COPY`/`ADD` walks. Globs `*` / + `**` / `?` / `[abc]` work; trailing `/` matches a directory and + everything below it; lines starting with `!` re-include. +- Tag the resulting image with `--tag NAME[:TAG]` to make it + reusable. + +Use `contree build --help` for the full flag list. + ## Built-in manual If something doesn't work or you need more details on a specific topic, diff --git a/contree_cli/types.py b/contree_cli/types.py index 3904e08..c98a4ff 100644 --- a/contree_cli/types.py +++ b/contree_cli/types.py @@ -70,6 +70,11 @@ "last": ("-l", "--last"), "prune": ("--prune",), "show_max": ("-M", "--show-max"), + # build + "dockerfile": ("--dockerfile",), + "tag_name": ("--tag",), + "build_arg": ("--build-arg",), + "no_cache": ("--no-cache",), } ) diff --git a/docs/commands/index.md b/docs/commands/index.md index 49dd646..474312b 100644 --- a/docs/commands/index.md +++ b/docs/commands/index.md @@ -9,6 +9,7 @@ use run +build images tag ps diff --git a/tests/test_build.py b/tests/test_build.py new file mode 100644 index 0000000..80f8a12 --- /dev/null +++ b/tests/test_build.py @@ -0,0 +1,337 @@ +from __future__ import annotations + +import json +from contextvars import copy_context +from pathlib import Path +from unittest.mock import patch + +import pytest +from conftest import ContreeTestClient, FakeResponse + +from contree_cli import CLIENT, FORMATTER, PROFILE, SESSION_STORE +from contree_cli.cli.build import ( + BuildArgs, + cmd_build, + make_session_key, +) +from contree_cli.config import ConfigProfile +from contree_cli.output import JSONFormatter +from contree_cli.session import SessionStore + +BASE_IMG = "11111111-1111-1111-1111-111111111111" +NEW_IMG = "22222222-2222-2222-2222-222222222222" +NEW_IMG_2 = "33333333-3333-3333-3333-333333333333" + + +def make_op_success(image: str, op_uuid: str = "op-1") -> FakeResponse: + return FakeResponse.json( + { + "uuid": op_uuid, + "kind": "instance", + "status": "SUCCESS", + "duration": 1.0, + "metadata": { + "result": { + "state": {"exit_code": 0}, + "stdout": None, + "stderr": None, + } + }, + "result": {"image": image, "tag": ""}, + } + ) + + +def make_spawn(op_uuid: str = "op-1") -> FakeResponse: + return FakeResponse.json({"uuid": op_uuid, "status": "PENDING"}, status=201) + + +def make_tag_lookup(image_uuid: str) -> FakeResponse: + return FakeResponse.json({"images": [{"uuid": image_uuid, "tag": "ubuntu:latest"}]}) + + +def run_build( + tc: ContreeTestClient, + args: BuildArgs, + responses: list[FakeResponse], + db_path: Path, +): + tc.fake.responses.extend(responses) + profile = ConfigProfile(name="test", url="http://x", token="t") + PROFILE.set(profile) + monkey_profile_path(profile, db_path) + FORMATTER.set(JSONFormatter()) + CLIENT.set(tc) + SESSION_STORE.set(SessionStore(db_path, "placeholder")) + ctx = copy_context() + with ( + patch("contree_cli.docker.kw_run.time.sleep"), + patch("contree_cli.docker.kw_from.time.sleep"), + ): + return ctx.run(cmd_build, args) + + +def monkey_profile_path(profile: ConfigProfile, db_path: Path): + object.__setattr__(profile, "_session_db_override", db_path) + from contree_cli.config import ConfigProfile as RealProfile + + if not hasattr(RealProfile, "_original_session_db_path"): + RealProfile._original_session_db_path = RealProfile.session_db_path # type: ignore[attr-defined] + + def patched(self): + override = getattr(self, "_session_db_override", None) + if override is not None: + return override + return RealProfile._original_session_db_path.fget(self) # type: ignore[attr-defined] + + RealProfile.session_db_path = property(patched) # type: ignore[assignment,misc] + + +@pytest.fixture +def context_dir(tmp_path: Path) -> Path: + d = tmp_path / "ctx" + d.mkdir() + return d + + +@pytest.fixture +def db_path(tmp_path: Path) -> Path: + return tmp_path / "session.db" + + +def write_dockerfile(d: Path, text: str) -> Path: + p = d / "Dockerfile" + p.write_text(text) + return p + + +class TestArgparseWiring: + def test_build_arg_namespace_decodes_to_build_args(self): + """--build-arg KEY=VAL must reach BuildArgs.build_args after parsing.""" + import contree_cli.arguments + + ns = contree_cli.arguments.parser.parse_args( + ["build", ".", "--build-arg", "VERSION=1.0", "--no-cache"] + ) + loader = ns.load_args + args = loader.from_args(ns) + assert args.build_args == ("VERSION=1.0",) + assert args.no_cache is True + assert args.context == "." + + +class TestSimpleBuild: + def test_from_run_creates_two_api_calls(self, context_dir, db_path): + write_dockerfile( + context_dir, + "FROM tag:ubuntu:latest\nRUN echo hi\n", + ) + tc = ContreeTestClient() + args = BuildArgs(context=str(context_dir)) + responses = [ + make_tag_lookup(BASE_IMG), + make_spawn(), + make_op_success(NEW_IMG), + ] + rc = run_build(tc, args, responses, db_path) + assert rc is None + assert tc.request_count == 3 + assert tc.get_request(0).method == "GET" + assert "/v1/images" in tc.get_request(0).path + assert tc.get_request(1).method == "POST" + assert "/v1/instances" in tc.get_request(1).path + assert tc.get_request(2).method == "GET" + assert "/v1/operations" in tc.get_request(2).path + + def test_run_payload_carries_command(self, context_dir, db_path): + write_dockerfile( + context_dir, + "FROM tag:ubuntu:latest\nRUN apt-get update\n", + ) + tc = ContreeTestClient() + args = BuildArgs(context=str(context_dir)) + run_build( + tc, + args, + [ + make_tag_lookup(BASE_IMG), + make_spawn(), + make_op_success(NEW_IMG), + ], + db_path, + ) + spawn = tc.get_request(1) + body = json.loads(spawn.body.decode()) + assert body["image"] == BASE_IMG + assert body["command"] == "apt-get update" + assert body["shell"] is True + + +class TestCache: + def test_second_build_is_full_cache_hit(self, context_dir, db_path): + write_dockerfile( + context_dir, + "FROM tag:ubuntu:latest\nRUN echo hi\n", + ) + args = BuildArgs(context=str(context_dir)) + + first = ContreeTestClient() + run_build( + first, + args, + [ + make_tag_lookup(BASE_IMG), + make_spawn(), + make_op_success(NEW_IMG), + ], + db_path, + ) + + second = ContreeTestClient() + run_build( + second, + args, + [make_tag_lookup(BASE_IMG)], + db_path, + ) + assert second.request_count == 1 + assert "/v1/images" in second.get_request(0).path + + def test_no_cache_reruns(self, context_dir, db_path): + write_dockerfile( + context_dir, + "FROM tag:ubuntu:latest\nRUN echo hi\n", + ) + + first = ContreeTestClient() + run_build( + first, + BuildArgs(context=str(context_dir)), + [ + make_tag_lookup(BASE_IMG), + make_spawn(), + make_op_success(NEW_IMG), + ], + db_path, + ) + + second = ContreeTestClient() + rc = run_build( + second, + BuildArgs(context=str(context_dir), no_cache=True), + [ + make_tag_lookup(BASE_IMG), + make_spawn("op-2"), + make_op_success(NEW_IMG_2, "op-2"), + ], + db_path, + ) + assert rc is None + assert second.request_count == 3 + + +class TestCopy: + def test_copy_pending_attaches_to_next_run(self, context_dir, db_path): + (context_dir / "app.py").write_text("print('hi')") + write_dockerfile( + context_dir, + "FROM tag:ubuntu:latest\nCOPY app.py /app.py\nRUN python /app.py\n", + ) + tc = ContreeTestClient() + responses = [ + make_tag_lookup(BASE_IMG), + FakeResponse.json({}, status=404), + FakeResponse.json({"uuid": "file-1", "sha256": "abc"}), + make_spawn(), + make_op_success(NEW_IMG), + ] + rc = run_build( + tc, + BuildArgs(context=str(context_dir)), + responses, + db_path, + ) + assert rc is None + spawn = tc.get_request(3) + body = json.loads(spawn.body.decode()) + assert "files" in body + assert "/app.py" in body["files"] + assert body["files"]["/app.py"]["uuid"] == "file-1" + + +class TestUnsupportedDirective: + def test_label_skipped_with_warning(self, context_dir, db_path, caplog): + write_dockerfile( + context_dir, + "FROM tag:ubuntu:latest\nLABEL maintainer=me\nRUN echo hi\n", + ) + tc = ContreeTestClient() + rc = run_build( + tc, + BuildArgs(context=str(context_dir)), + [ + make_tag_lookup(BASE_IMG), + make_spawn(), + make_op_success(NEW_IMG), + ], + db_path, + ) + assert rc is None + assert any("not supported" in r.message for r in caplog.records) + + +class TestBuildArgs: + def test_build_arg_substitutes_in_run(self, context_dir, db_path): + write_dockerfile( + context_dir, + "FROM tag:ubuntu:latest\nARG VERSION=1.0\nRUN echo $VERSION\n", + ) + tc = ContreeTestClient() + run_build( + tc, + BuildArgs(context=str(context_dir), build_args=("VERSION=2.5",)), + [ + make_tag_lookup(BASE_IMG), + make_spawn(), + make_op_success(NEW_IMG), + ], + db_path, + ) + spawn_body = json.loads(tc.get_request(1).body.decode()) + assert spawn_body["command"] == "echo 2.5" + + +class TestSessionKey: + def test_deterministic(self, tmp_path): + a = make_session_key(tmp_path / "p") + b = make_session_key(tmp_path / "p") + assert a == b + assert a.startswith("build:") + + def test_differs_by_path(self, tmp_path): + a = make_session_key(tmp_path / "a") + b = make_session_key(tmp_path / "b") + assert a != b + + +class TestTag: + def test_final_image_tagged(self, context_dir, db_path): + write_dockerfile(context_dir, "FROM tag:ubuntu:latest\nRUN echo hi\n") + tc = ContreeTestClient() + rc = run_build( + tc, + BuildArgs(context=str(context_dir), tag="mybuild:test"), + [ + make_tag_lookup(BASE_IMG), + make_spawn(), + make_op_success(NEW_IMG), + FakeResponse.json({}), + ], + db_path, + ) + assert rc is None + tag_req = tc.get_request(3) + assert tag_req.method == "PATCH" + assert NEW_IMG in tag_req.path + body = json.loads(tag_req.body.decode()) + assert body == {"tag": "mybuild:test"} diff --git a/tests/test_dockerfile.py b/tests/test_dockerfile.py new file mode 100644 index 0000000..3892242 --- /dev/null +++ b/tests/test_dockerfile.py @@ -0,0 +1,197 @@ +from __future__ import annotations + +import pytest + +from contree_cli.docker import ( + AddKeyword, + ArgKeyword, + CopyKeyword, + EnvKeyword, + FromKeyword, + RunKeyword, + SkippedKeyword, + UserKeyword, + WorkdirKeyword, + parse_dockerfile, + substitute, +) + + +class TestSubstitute: + def test_dollar_var(self): + assert substitute("$FOO", {"FOO": "bar"}) == "bar" + + def test_braces(self): + assert substitute("${FOO}_baz", {"FOO": "bar"}) == "bar_baz" + + def test_missing_var_becomes_empty(self): + assert substitute("$NOPE/path", {}) == "/path" + + def test_keeps_literal_dollar_without_name(self): + assert substitute("price: $", {}) == "price: $" + + def test_multiple_substitutions(self): + env = {"A": "x", "B": "y"} + assert substitute("$A-${B}-$A", env) == "x-y-x" + + +class TestParseFrom: + def test_bare(self): + d = parse_dockerfile("FROM ubuntu:latest") + assert d == [FromKeyword(image_ref="ubuntu:latest", alias="")] + + def test_with_alias(self): + d = parse_dockerfile("FROM ubuntu:latest AS base") + assert d == [FromKeyword(image_ref="ubuntu:latest", alias="base")] + + def test_lowercase_keyword(self): + d = parse_dockerfile("from alpine") + assert d == [FromKeyword(image_ref="alpine", alias="")] + + def test_invalid_syntax(self): + with pytest.raises(ValueError): + parse_dockerfile("FROM a b c") + + +class TestParseRun: + def test_shell_form(self): + d = parse_dockerfile("RUN apt-get update && apt-get install -y curl") + assert d == [ + RunKeyword( + parts=("apt-get update && apt-get install -y curl",), + shell_form=True, + ) + ] + + def test_exec_form(self): + d = parse_dockerfile('RUN ["echo", "hi"]') + assert d == [RunKeyword(parts=("echo", "hi"), shell_form=False)] + + def test_invalid_exec_form(self): + with pytest.raises(ValueError): + parse_dockerfile("RUN [not json]") + + +class TestParseCopyAndAdd: + def test_simple(self): + d = parse_dockerfile("COPY ./app /app") + assert d == [ + CopyKeyword( + sources=("./app",), + dest="/app", + chown="", + chmod="", + from_stage="", + ) + ] + + def test_chown_and_chmod(self): + d = parse_dockerfile("COPY --chown=1000:1000 --chmod=0755 a.py /app.py") + assert d == [ + CopyKeyword( + sources=("a.py",), + dest="/app.py", + chown="1000:1000", + chmod="0755", + from_stage="", + ) + ] + + def test_add(self): + d = parse_dockerfile("ADD ./pkg.tar /opt") + assert d == [ + AddKeyword( + sources=("./pkg.tar",), + dest="/opt", + chown="", + chmod="", + from_stage="", + ) + ] + + def test_multi_source(self): + d = parse_dockerfile("COPY a b c /dest/") + assert d[0].sources == ("a", "b", "c") + assert d[0].dest == "/dest/" + + def test_unknown_option(self): + with pytest.raises(ValueError): + parse_dockerfile("COPY --weird=1 a /dst") + + def test_missing_dest(self): + with pytest.raises(ValueError): + parse_dockerfile("COPY only-one-arg") + + +class TestParseEnvAndArg: + def test_env_key_equals_value(self): + d = parse_dockerfile("ENV FOO=bar BAZ=qux") + assert d == [EnvKeyword(pairs=(("FOO", "bar"), ("BAZ", "qux")))] + + def test_env_legacy_form(self): + d = parse_dockerfile("ENV NAME hello world") + assert d == [EnvKeyword(pairs=(("NAME", "hello world"),))] + + def test_arg_with_default(self): + d = parse_dockerfile("ARG VERSION=1.0") + assert d == [ArgKeyword(name="VERSION", default="1.0")] + + def test_arg_no_default(self): + d = parse_dockerfile("ARG TOKEN") + assert d == [ArgKeyword(name="TOKEN", default=None)] + + +class TestParseWorkdirUser: + def test_workdir(self): + d = parse_dockerfile("WORKDIR /app") + assert d == [WorkdirKeyword(path="/app")] + + def test_user(self): + d = parse_dockerfile("USER nobody") + assert d == [UserKeyword(spec="nobody")] + + +class TestSkipped: + @pytest.mark.parametrize( + "kw", + [ + "CMD", + "ENTRYPOINT", + "LABEL", + "EXPOSE", + "VOLUME", + "STOPSIGNAL", + "MAINTAINER", + "HEALTHCHECK", + "ONBUILD", + "SHELL", + ], + ) + def test_recognised_but_skipped(self, kw): + d = parse_dockerfile(f"{kw} whatever args") + assert isinstance(d[0], SkippedKeyword) + assert d[0].name == kw + + def test_unknown_keyword_errors(self): + with pytest.raises(ValueError, match="unknown"): + parse_dockerfile("BANANARAMA hi") + + +class TestCommentsAndContinuations: + def test_comments_skipped(self): + d = parse_dockerfile("# header\nFROM alpine\n# trailing comment\nRUN echo hi\n") + assert len(d) == 2 + + def test_blank_lines_skipped(self): + d = parse_dockerfile("\n\nFROM alpine\n\n\nRUN echo\n\n") + assert len(d) == 2 + + def test_line_continuation_joins(self): + d = parse_dockerfile( + "RUN apt-get update && \\\n apt-get install -y \\\n curl" + ) + run = d[0] + assert isinstance(run, RunKeyword) + assert "apt-get update" in run.parts[0] + assert "install -y" in run.parts[0] + assert "curl" in run.parts[0] diff --git a/tests/test_dockerignore.py b/tests/test_dockerignore.py new file mode 100644 index 0000000..aa13027 --- /dev/null +++ b/tests/test_dockerignore.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +import pytest + +from contree_cli.docker.dockerignore import ( + is_ignored, + parse_dockerignore, + pattern_to_regex, +) +from contree_cli.docker.local_context import LocalContext + + +class TestPatternToRegex: + @pytest.mark.parametrize( + "pattern,subject,expected", + [ + ("foo", "foo", True), + ("foo", "bar", False), + ("foo", "foo/bar", False), # bare pattern, no subpath match + ("foo/", "foo/bar", True), + ("foo/", "foo", True), # trailing slash also matches the bare dir name + ("*.log", "x.log", True), + ("*.log", "sub/x.log", False), # * does not cross / + ("**/*.log", "x.log", True), + ("**/*.log", "a/x.log", True), + ("**/*.log", "a/b/x.log", True), + ("src/**", "src/a/b.c", True), + ("src/**", "src/a.txt", True), + ("a/**/b", "a/b", True), + ("a/**/b", "a/x/b", True), + ("a/**/b", "a/x/y/b", True), + ("a?b", "axb", True), + ("a?b", "ab", False), + ("file[12]", "file1", True), + ("file[12]", "file3", False), + ], + ) + def test_matches(self, pattern, subject, expected): + import re + + regex = pattern_to_regex(pattern) + assert bool(re.fullmatch(regex, subject)) is expected + + +class TestParseDockerignore: + def test_missing_file_returns_empty(self, tmp_path): + assert parse_dockerignore(tmp_path) == () + + def test_comments_and_blank_lines_skipped(self, tmp_path): + (tmp_path / ".dockerignore").write_text( + "# header\n\nfoo\n # indented comment kept as literal? no\n!bar\n" + ) + rules = parse_dockerignore(tmp_path) + # " # indented" is not a comment in Docker; we follow simple lstrip + # check then startswith #. After strip, it becomes "# indented comment..." + # which IS treated as a comment. Verify behaviour. + assert len(rules) == 2 + assert rules[0].raw.strip() == "foo" + assert rules[1].negate is True + + def test_negation_then_match(self, tmp_path): + (tmp_path / ".dockerignore").write_text("*.log\n!keep.log\n") + rules = parse_dockerignore(tmp_path) + assert is_ignored("x.log", rules) is True + assert is_ignored("keep.log", rules) is False + + def test_match_order_last_wins(self, tmp_path): + (tmp_path / ".dockerignore").write_text("!keep.log\n*.log\n") + rules = parse_dockerignore(tmp_path) + # *.log comes after !keep.log so keep.log gets re-ignored + assert is_ignored("keep.log", rules) is True + + +class TestLocalContextIgnore: + def test_dockerignore_filters_dir_walk(self, tmp_path): + (tmp_path / "src").mkdir() + (tmp_path / "src" / "app.py").write_text("ok") + (tmp_path / "src" / "ignore.log").write_text("nope") + (tmp_path / ".dockerignore").write_text("**/*.log\n") + + local = LocalContext.from_dir(tmp_path) + mapped = local.collect(("src",), "/app", uid=0, gid=0, mode_override=None) + paths = sorted(m.instance_path for m in mapped) + assert paths == ["/app/app.py"] + + def test_dockerignore_blocks_file_source(self, tmp_path): + (tmp_path / "secret.env").write_text("token=hi") + (tmp_path / ".dockerignore").write_text("*.env\n") + + local = LocalContext.from_dir(tmp_path) + mapped = local.collect( + ("secret.env",), "/app.env", uid=0, gid=0, mode_override=None + ) + assert mapped == [] + + def test_default_excludes_still_apply(self, tmp_path): + (tmp_path / ".git").mkdir() + (tmp_path / ".git" / "config").write_text("x") + (tmp_path / "app.py").write_text("ok") + + local = LocalContext.from_dir(tmp_path) + mapped = local.collect((".",), "/app/", uid=0, gid=0, mode_override=None) + paths = sorted(m.instance_path for m in mapped) + # .git filtered out by DEFAULT_FILE_EXCLUDES + assert all(".git" not in p for p in paths) + assert "/app/app.py" in paths + + def test_negation_reincludes(self, tmp_path): + (tmp_path / "logs").mkdir() + (tmp_path / "logs" / "keep.log").write_text("k") + (tmp_path / "logs" / "junk.log").write_text("j") + (tmp_path / ".dockerignore").write_text("logs/*.log\n!logs/keep.log\n") + + local = LocalContext.from_dir(tmp_path) + mapped = local.collect(("logs",), "/dest", uid=0, gid=0, mode_override=None) + paths = sorted(m.instance_path for m in mapped) + assert paths == ["/dest/keep.log"] diff --git a/tests/test_run.py b/tests/test_run.py index 720da2e..11d933e 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -427,7 +427,7 @@ def test_file_upload(self, contree_client, session_store, tmp_path): # Verify dedup check then file upload req0 = contree_client.get_request(0) assert req0.method == "GET" - assert "/v1/files?sha256=" in req0.path + assert "/v1/files/" in req0.path req1 = contree_client.get_request(1) assert req1.method == "POST" assert "/v1/files" in req1.path @@ -466,7 +466,7 @@ def test_file_uuid_in_spawn_payload(self, contree_client, session_store, tmp_pat assert body["files"]["/app/script.sh"]["uid"] == 1000 def test_file_dedup_skips_upload(self, contree_client, session_store, tmp_path): - """GET /v1/files?sha256=... returns 200 -> no POST upload, UUID reused.""" + """GET /v1/files/... returns 200 -> no POST upload, UUID reused.""" session_store.set_image(IMG_UUID, kind="test") host_file = tmp_path / "data.txt" host_file.write_text("content") @@ -491,7 +491,7 @@ def test_file_dedup_skips_upload(self, contree_client, session_store, tmp_path): # Only GET (dedup), POST (spawn), GET (poll) -- no POST /v1/files req0 = contree_client.get_request(0) assert req0.method == "GET" - assert "/v1/files?sha256=" in req0.path + assert "/v1/files/" in req0.path assert methods.count("POST") == 1 # only the spawn POST # Spawn uses the existing UUID @@ -522,7 +522,7 @@ def test_file_dedup_logs_reuse( ] with caplog.at_level(logging.INFO): _run_cmd(contree_client, args, responses, store=session_store) - assert "Uploaded file:" in caplog.text + assert "File reused:" in caplog.text assert "existing-uuid" in caplog.text def test_file_dedup_non_404_raises(self, contree_client, session_store, tmp_path): @@ -613,7 +613,7 @@ def test_local_file_cache_invalidated_when_file_changes( req0 = contree_client.get_request(0) assert req0.method == "GET" - assert "/v1/files?sha256=" in req0.path + assert "/v1/files/" in req0.path spawn_req = contree_client.get_request(1) spawn_body = json.loads(spawn_req.body) assert spawn_body["files"]["/app/cached-change.txt"]["uuid"] == "new-uuid" From 353902195eefda5cd03e2665f97bd12da38aacd9 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Tue, 12 May 2026 23:12:43 +0200 Subject: [PATCH 5/9] add contree build --- docs/commands/build.md | 180 +++++++++++++++++++++++ docs/examples/build-demo/Dockerfile | 12 ++ docs/examples/build-demo/hello.py | 11 ++ docs/examples/build-demo/src/__init__.py | 0 docs/examples/build-demo/src/banner.py | 11 ++ 5 files changed, 214 insertions(+) create mode 100644 docs/commands/build.md create mode 100644 docs/examples/build-demo/Dockerfile create mode 100644 docs/examples/build-demo/hello.py create mode 100644 docs/examples/build-demo/src/__init__.py create mode 100644 docs/examples/build-demo/src/banner.py diff --git a/docs/commands/build.md b/docs/commands/build.md new file mode 100644 index 0000000..754c155 --- /dev/null +++ b/docs/commands/build.md @@ -0,0 +1,180 @@ +% build command reference for the Docker-style Dockerfile interpreter +# build + +Build an image from a `Dockerfile`. Each directive runs against the +contree API and produces a new image layer; successful layers are +materialised as branches named `layer:` so re-running the +same Dockerfile reuses prior work. + +## Synopsis + +```bash +contree build [CONTEXT] [--dockerfile PATH] [--tag NAME[:TAG]] + [--build-arg K=V ...] [--no-cache] [--timeout SEC] +``` + +- `CONTEXT` -- build context directory (default `.`). +- `--dockerfile PATH` -- override the default `/Dockerfile`. +- `--tag NAME[:TAG]` -- tag the final image via `PATCH /v1/images/{uuid}/tag`. +- `--build-arg KEY=VALUE` -- supply a value for an `ARG` declared in the + Dockerfile (repeatable). +- `--no-cache` -- ignore existing `layer:` branches and rebuild. +- `--timeout SEC` -- per-`RUN` operation timeout in seconds (default 600). + +## Help output + +```{terminal-shell} contree build --help +``` + +## Examples + +```bash +# Simplest build; finds ./Dockerfile, tags the result +contree build . --tag myapp:dev + +# Out-of-tree Dockerfile +contree build ./service --dockerfile ./service/Dockerfile.prod --tag svc:prod + +# Override build-time variables +contree build . --build-arg VERSION=2.5 --build-arg DEBUG=1 + +# Force a rebuild ignoring cached layers +contree build . --no-cache --tag myapp:dev +``` + +## Supported directives (MVP) + +| Directive | Behaviour | +|-----------|-----------| +| `FROM ref[:tag] [AS name]` | Resolves the base image. If the tag is not found locally, the build auto-imports it via `POST /v1/images/import`. `AS name` is parsed but ignored (multi-stage is Phase 2). | +| `RUN ...` | Shell-form (`RUN echo hi`) or JSON exec-form (`RUN ["echo","hi"]`). Spawns `POST /v1/instances`, polls until terminal status, captures the resulting image. | +| `COPY [--chown=...] [--chmod=...] SRC... DEST` | Walks local sources relative to the build context, applies `.dockerignore`, uploads files (with SHA256 dedup), and stages them for the next `RUN`. | +| `ADD ...` | Same as `COPY` for local files; URL/tar inputs emit a warning and are skipped. | +| `WORKDIR /path` | Sets the working directory for subsequent directives. | +| `ENV KEY=VALUE ...` | Accumulates environment variables passed to every `RUN`. | +| `ARG NAME[=DEFAULT]` | Declares a build-time variable. Overridden by `--build-arg`. | +| `USER name` | Subsequent `RUN` commands are wrapped in `su -s /bin/sh -c '' `. | +| `CMD`, `ENTRYPOINT`, `LABEL`, `EXPOSE`, `VOLUME`, `STOPSIGNAL`, `MAINTAINER`, `HEALTHCHECK`, `ONBUILD`, `SHELL` | Parsed but skipped with a warning. | + +`COPY --from=stage` is a Phase 2 feature; in MVP it warns and skips. + +## Sessions and layer cache + +Builds run in a dedicated session keyed by the absolute path of the +context directory: `build:`. Re-running the +same Dockerfile in the same context reuses cached layers across +invocations of `contree build`; switching to `--no-cache` rebuilds +everything. + +Layers are stored as branches whose names are the chain-hash of: + +``` +sha256(parent_layer_hash || state(workdir/env/user/args) || directive || pending_files) +``` + +To inspect the resulting branches: + +```bash +contree session list --filter build: +contree session show +``` + +## `.dockerignore` + +`contree build` reads `/.dockerignore` and filters every +`COPY`/`ADD` walk. Rules are matched in order against POSIX-style +paths relative to the context root; the last matching rule wins, +so `!` re-includes a previously ignored path. + +``` +# .dockerignore +**/*.log +.env* +node_modules +!logs/keep.log +``` + +Globs: +- `*` matches a single path segment (does not cross `/`). +- `**` matches zero or more path components. +- `?` matches one character. +- `[abc]` is a character class. +- Trailing `/` matches a directory and everything below it. + +The default exclude list from `run --file` (`.git`, `*.pyc`, +`__pycache__`, `.venv`, `node_modules`, `dist`, `build`, etc.) is +always applied on top of `.dockerignore`. + +## Variable substitution + +`$VAR` and `${VAR}` are expanded in `FROM`, `RUN`, `COPY`/`ADD` +arguments, `WORKDIR`, `ENV` values, and `USER`. The value source is: + +1. `--build-arg KEY=VALUE` (highest priority for declared `ARG` names). +2. `ENV` directives processed so far. +3. `ARG` defaults. +4. Empty string for unknown names. + +## End-to-end demo + +A small example lives in `docs/examples/build-demo/`. The Dockerfile +exercises `FROM`, `ARG`, `ENV`, `WORKDIR`, two `COPY` directives (file +and directory), and two `RUN` directives. A `.dockerignore` filters +log files and `__pycache__` from the upload. + +```dockerfile +% docs/examples/build-demo/Dockerfile +FROM python:3.12-alpine + +ARG GREETING=hello +ENV APP_GREETING=${GREETING} + +WORKDIR /app + +COPY hello.py /app/hello.py +COPY src /app/src + +RUN python -c "import sys; print('python', sys.version)" +RUN python /app/hello.py +``` + +```dockerfile +% docs/examples/build-demo/.dockerignore +**/*.log +**/__pycache__ +.env* +``` + +Build and tag it: + +```bash +contree build docs/examples/build-demo --tag contree-cli-build-demo:latest +``` + +Expected output (truncated): + +```text +[INFO] RUN spawned op=019e... RUN python -c "import sys; print('python', sys.version)" +[INFO] stdout: +python 3.12.13 ... + +[INFO] RUN spawned op=019e... RUN python /app/hello.py +[INFO] stdout: ++---------------+ +| hello | +| contree build | ++---------------+ + +[INFO] tagged as contree-cli-build-demo:latest +IMAGE TAG SESSION + contree-cli-build-demo:latest build: +``` + +Re-running the same command without `--no-cache` produces three layer +cache hits and no API instance spawns. + +## See also + +- {doc}`/commands/run` -- the single-shot version of what `RUN` does. +- {doc}`/commands/session` -- inspect or branch the layer history. +- {doc}`/commands/images` -- list, import, and tag images directly. diff --git a/docs/examples/build-demo/Dockerfile b/docs/examples/build-demo/Dockerfile new file mode 100644 index 0000000..43d85e6 --- /dev/null +++ b/docs/examples/build-demo/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.12-alpine + +ARG GREETING=hello +ENV APP_GREETING=${GREETING} + +WORKDIR /app + +COPY hello.py /app/hello.py +COPY src /app/src + +RUN python -c "import sys; print('python', sys.version)" +RUN python /app/hello.py diff --git a/docs/examples/build-demo/hello.py b/docs/examples/build-demo/hello.py new file mode 100644 index 0000000..4c16569 --- /dev/null +++ b/docs/examples/build-demo/hello.py @@ -0,0 +1,11 @@ +import os + +from src import banner + + +def main() -> None: + print(banner.box(os.environ.get("APP_GREETING", "hi"), "contree build")) + + +if __name__ == "__main__": + main() diff --git a/docs/examples/build-demo/src/__init__.py b/docs/examples/build-demo/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/examples/build-demo/src/banner.py b/docs/examples/build-demo/src/banner.py new file mode 100644 index 0000000..d3850ed --- /dev/null +++ b/docs/examples/build-demo/src/banner.py @@ -0,0 +1,11 @@ +def box(top: str, bottom: str) -> str: + width = max(len(top), len(bottom)) + 4 + line = "+" + "-" * (width - 2) + "+" + return "\n".join( + [ + line, + f"| {top.center(width - 4)} |", + f"| {bottom.center(width - 4)} |", + line, + ] + ) From b1dec671623d1a3c0139b9e491d65db6d50ce67d Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Tue, 12 May 2026 23:21:38 +0200 Subject: [PATCH 6/9] file list --- contree_cli/agent.md | 24 ++++++ contree_cli/cli/file.py | 150 +++++++++++++++++++++++++++++++++++++- contree_cli/cli/run.py | 4 + contree_cli/session.py | 22 ++++++ contree_cli/skill_body.md | 11 +++ docs/commands/file.md | 33 +++++++++ tests/test_file_cmd.py | 83 ++++++++++++++++++++- tests/test_session.py | 16 ++++ 8 files changed, 340 insertions(+), 3 deletions(-) diff --git a/contree_cli/agent.md b/contree_cli/agent.md index ce8026e..0f6da54 100644 --- a/contree_cli/agent.md +++ b/contree_cli/agent.md @@ -198,6 +198,29 @@ Pending files are injected into the next non-disposable run. Explicit --file takes priority over pending files at same path. Pending files are branch-aware. +Listing uploaded files: + contree file ls list all uploaded files in the project + contree file ls --since 1d narrow by upload time + contree file ls -q uuid + sha256 + local_path only (quiet) + contree -f json file ls JSON output for jq + + Output joins remote files (uuid, sha256, size, created_at) with the + local upload cache: when the CLI uploaded the file from this host + it shows the absolute LOCAL_PATH alongside the remote UUID. + + IMPORTANT: LOCAL_PATH is resolved ONLY for files uploaded from this + specific machine. The path-to-uuid mapping lives in the local SQLite + cache (per profile, under $CONTREE_HOME/cli/sessions/.db) + keyed by path+inode+mtime+size, and is NOT shared between hosts. + Rows show empty LOCAL_PATH when: + - the file was uploaded from a different machine or by a teammate; + - the local file has been moved, renamed, or its inode/mtime/size + changed since upload (the cache key no longer matches); + - the upload happened before path tracking landed (older entries + backfill on the next match). + An agent must not assume LOCAL_PATH is authoritative across hosts; + for cross-machine identity always use the remote UUID or sha256. + More: contree run --help, contree file --help Execution modes @@ -411,6 +434,7 @@ All commands env [KEY=VALUE ...] Session env vars (-d to unset) file edit PATH Edit remote file via $EDITOR file cp SRC DEST Stage local file for next run + file ls [-q] List uploaded files + local path (aliases: list) session list List sessions (aliases: ls) session branch [NAME] Create/list branches (aliases: br) session checkout BRANCH Switch branch (aliases: co) diff --git a/contree_cli/cli/file.py b/contree_cli/cli/file.py index 165be63..a17565d 100644 --- a/contree_cli/cli/file.py +++ b/contree_cli/cli/file.py @@ -23,13 +23,26 @@ import subprocess import tempfile from dataclasses import dataclass +from datetime import datetime from pathlib import Path -from contree_cli import CLIENT, SESSION_STORE, ArgumentsProtocol, SetupResult +from contree_cli import ( + CLIENT, + FORMATTER, + SESSION_STORE, + ArgumentsProtocol, + SetupResult, +) from contree_cli.client import ApiError, ContreeClient, resolve_image, stream_response from contree_cli.config import EDITOR from contree_cli.session import SessionStore -from contree_cli.types import FLAGS +from contree_cli.types import ( + FLAGS, + isoformat_datetime, + parse_datetime, + parse_interval, + positive_int, +) logger = logging.getLogger(__name__) @@ -61,6 +74,27 @@ def from_args(cls, ns: argparse.Namespace) -> FileCpArgs: return cls(src=ns.src, dest=ns.dest) +FILE_LIST_LIMIT_DEFAULT = 1000 +FILE_LIST_PAGE_SIZE = 1000 + + +@dataclass(frozen=True) +class FileListArgs(ArgumentsProtocol): + since: datetime | None = None + until: datetime | None = None + limit: int = FILE_LIST_LIMIT_DEFAULT + quiet: bool = False + + @classmethod + def from_args(cls, ns: argparse.Namespace) -> FileListArgs: + return cls( + since=getattr(ns, "since", None), + until=getattr(ns, "until", None), + limit=getattr(ns, "limit", FILE_LIST_LIMIT_DEFAULT), + quiet=bool(getattr(ns, "quiet", False)), + ) + + def setup_parser(p: argparse.ArgumentParser) -> SetupResult: sub = p.add_subparsers(dest="file_action", required=True) edit_p = sub.add_parser( @@ -102,6 +136,56 @@ def setup_parser(p: argparse.ArgumentParser) -> SetupResult: cp_p.add_argument("dest", help="Destination path inside image") cp_p.set_defaults(handler=cmd_file_cp, load_args=FileCpArgs) + ls_p = sub.add_parser( + "ls", + aliases=["list"], + help="List uploaded files (joined with local cache)", + description=( + "List remote files uploaded to the project and, when present in" + " the local upload cache, show the host path that produced them.\n" + "\n" + "local_path is THIS-MACHINE ONLY: the mapping lives in the local" + " CLI cache ($CONTREE_HOME/cli/sessions/.db) keyed by" + " path+inode+mtime+size and is never synced. Files uploaded from" + " a different host, by a teammate, or before path tracking landed" + " will show an empty local_path -- that is expected, not a bug." + " Use the remote uuid or sha256 for cross-machine identity." + ), + epilog=( + "examples:\n" + " contree file ls\n" + " contree file ls --since 1d\n" + " contree file ls --limit 5000\n" + " contree file ls -q # uuid + sha256 + local_path\n" + " contree -f json file ls\n" + ), + ) + ls_p.add_argument( + *FLAGS["since"], + type=parse_interval, + help=parse_interval.__doc__, + ) + ls_p.add_argument( + *FLAGS["until"], + type=parse_interval, + help="Show files before. " + str(parse_interval.__doc__), + ) + ls_p.add_argument( + *FLAGS["limit"], + type=positive_int, + default=FILE_LIST_LIMIT_DEFAULT, + help="Stop after this many files and warn if more are available", + ) + ls_p.add_argument( + *FLAGS["quiet"], + action="store_true", + help=( + "Emit only uuid, sha256, and local_path columns. local_path is" + " populated only for files uploaded from this very machine." + ), + ) + ls_p.set_defaults(handler=cmd_file_ls, load_args=FileListArgs) + return cmd_file_edit, FileEditArgs @@ -222,3 +306,65 @@ def cmd_file_cp(args: FileCpArgs) -> int | None: title=f"Change file {args.dest}", ) return None + + +def cmd_file_ls(args: FileListArgs) -> int | None: + client = CLIENT.get() + store = SESSION_STORE.get() + formatter = FORMATTER.get() + + local_paths = store.cache.local_file_paths() + + params: dict[str, str] = {} + if args.since is not None: + params["since"] = isoformat_datetime(args.since) + if args.until is not None: + params["until"] = isoformat_datetime(args.until) + + offset = 0 + emitted = 0 + while emitted < args.limit: + page_size = min(FILE_LIST_PAGE_SIZE, args.limit - emitted) + page = {**params, "offset": str(offset), "limit": str(page_size)} + resp = client.get("/v1/files", params=page) + data = json.loads(resp.read()) + files = data.get("files", []) + if not files: + return None + for entry in files: + uuid_str = entry.get("uuid") + local_path = ( + local_paths.get(uuid_str, "") if isinstance(uuid_str, str) else "" + ) + if args.quiet: + formatter( + uuid=uuid_str, + sha256=entry.get("sha256", ""), + local_path=local_path, + ) + continue + row: dict[str, object] = {} + for key, value in entry.items(): + if isinstance(value, (dict, list)): + continue + if key in {"created_at", "updated_at"} and isinstance(value, str): + value = parse_datetime(value) + row[key] = value + row["local_path"] = local_path + formatter(**row) + emitted += len(files) + if len(files) < page_size: + return None + offset += len(files) + + probe = {**params, "offset": str(offset), "limit": "1"} + resp = client.get("/v1/files", params=probe) + data = json.loads(resp.read()) + if data.get("files"): + formatter.flush() + logger.warning( + "Output truncated at --limit=%d files; more results are" + " available. Raise --limit or narrow with --since/--until.", + args.limit, + ) + return None diff --git a/contree_cli/cli/run.py b/contree_cli/cli/run.py index c7a2bd7..0bec97a 100644 --- a/contree_cli/cli/run.py +++ b/contree_cli/cli/run.py @@ -379,6 +379,7 @@ def record_local_uuid(mf: MappedFile, file_uuid: str, store: SessionStore) -> No store.cache[("", cache_kind)] = { "uuid": file_uuid, "uploaded_at": time.time(), + "local_path": os.path.abspath(mf.host_path), } @@ -418,6 +419,9 @@ def upload_files( cached = cached_local_uuid(mf, store) if cached: uploaded[mf.host_path] = cached + # Rewrite the entry so older payloads without local_path are + # backfilled with the current host path. + record_local_uuid(mf, cached, store) else: pending.append(mf) diff --git a/contree_cli/session.py b/contree_cli/session.py index ba7cbcf..1e605e4 100644 --- a/contree_cli/session.py +++ b/contree_cli/session.py @@ -217,6 +217,28 @@ def __len__(self) -> int: assert row is not None return row[0] # type: ignore[no-any-return] + def local_file_paths(self) -> dict[str, str]: + """Map remote file UUID to the host path that uploaded it. + + Reads every ``local_file:*`` cache entry, decodes its JSON + payload, and returns ``{remote_uuid: local_path}`` for entries + that have both fields. Older entries without ``local_path`` + are silently skipped. + """ + cur = self._conn.execute( + "SELECT value FROM image_cache WHERE kind LIKE 'local_file:%'", + ) + result: dict[str, str] = {} + for row in cur.fetchall(): + value = self._decode(row["value"]) + if not isinstance(value, dict): + continue + uuid_str = value.get("uuid") + local_path = value.get("local_path") + if isinstance(uuid_str, str) and isinstance(local_path, str): + result[uuid_str] = local_path + return result + def invalidate_prefix( self, *, diff --git a/contree_cli/skill_body.md b/contree_cli/skill_body.md index 1ac94ef..0f567bc 100644 --- a/contree_cli/skill_body.md +++ b/contree_cli/skill_body.md @@ -167,6 +167,17 @@ Unsure about sessions? Run `contree session --help` or `contree agent sessions` - `cp`: download a file from the image to the host. - `file edit`: open a remote file in a host editor and stage it for the next run. - `file cp`: upload a local file and stage it for the next run. +- `file ls`: list uploaded files; rows uploaded from this host show their + `local_path` so the agent can map a UUID back to a workspace file. Add + `-q` for a tight `uuid sha256 local_path` view. + + **`local_path` is THIS-MACHINE ONLY.** The mapping lives in the local + CLI SQLite cache (`$CONTREE_HOME/cli/sessions/.db`) keyed by + `path + inode + mtime + size`. It is not synced anywhere. Rows uploaded + from a different machine, by another teammate, or before path tracking + landed will show an empty `local_path` — that is expected, not a bug. + When working across hosts, treat the remote `uuid`/`sha256` as the + authoritative identifier and never rely on `local_path` resolving. - `session branch`: create an experimental branch. - `session checkout`: switch active branch. - `session rollback`: move the active branch pointer backward. diff --git a/docs/commands/file.md b/docs/commands/file.md index e372e72..c24faa2 100644 --- a/docs/commands/file.md +++ b/docs/commands/file.md @@ -35,6 +35,39 @@ Copies a local file and stages it at the given path inside the image. The file is uploaded immediately but only applied to the sandbox on the next `contree run`. +### `file ls` + +Lists files uploaded to the project (`GET /v1/files`) and joins each row +with the local upload cache so that the host path that produced the file +is shown under `LOCAL_PATH` when known. + +:::{important} +`local_path` resolves **only for files uploaded from this very machine**. +The mapping lives in the local SQLite cache (per-profile, under +`$CONTREE_HOME/cli/sessions/.db`) and is keyed by +`path + inode + mtime + size`. It is **not** synced anywhere, so a row +will show an empty `LOCAL_PATH` whenever: + +- the file was uploaded by a different machine, container, or teammate; +- the file was uploaded by an earlier CLI version that did not yet + store the host path (those entries backfill the next time the file + is matched by the local cache); +- the local file has been moved, renamed, or its inode/mtime/size has + changed since upload (the cache key no longer matches and the + mapping is treated as missing until the next upload). + +There is no way to recover the host path of a file that was uploaded +from another machine — the server stores only `uuid`, `sha256`, `size`, +`created_at`, and `updated_at`. +::: + +```bash +contree file ls +contree file ls --since 1d --limit 200 +contree file ls -q # uuid + sha256 + local_path only +contree -f json file ls | jq 'select(.local_path != "")' +``` + ## Pending files Pending files accumulate until the next `contree run` consumes them. diff --git a/tests/test_file_cmd.py b/tests/test_file_cmd.py index 502f6c6..1c3fd10 100644 --- a/tests/test_file_cmd.py +++ b/tests/test_file_cmd.py @@ -10,15 +10,18 @@ import pytest from conftest import ContreeTestClient -from contree_cli import SESSION_STORE +from contree_cli import CLIENT, FORMATTER, SESSION_STORE from contree_cli.cli.file import ( FileCpArgs, FileEditArgs, + FileListArgs, _file_sha256, cmd_file_cp, cmd_file_edit, + cmd_file_ls, ) from contree_cli.client import ApiError +from contree_cli.output import JSONFormatter from contree_cli.session import SessionStore @@ -95,6 +98,84 @@ def fake_editor(cmd: str, *, shell: bool = True) -> int: return rc +def _run_file_ls( + tc: ContreeTestClient, + args: FileListArgs, + responses: list[StreamResponse], + *, + store: SessionStore, +) -> int | None: + tc.fake.responses.extend(responses) + CLIENT.set(tc) + SESSION_STORE.set(store) + FORMATTER.set(JSONFormatter()) + ctx = copy_context() + return ctx.run(cmd_file_ls, args) + + +class TestFileLs: + def test_lists_with_local_path(self, contree_client, session_store, capsys): + session_store.cache[("", "local_file:a")] = { + "uuid": "file-1", + "local_path": "/host/app.py", + } + responses = [ + _api_response( + { + "files": [ + {"uuid": "file-1", "sha256": "abc", "size": 10}, + {"uuid": "file-2", "sha256": "def", "size": 20}, + ] + } + ), + ] + rc = _run_file_ls( + contree_client, + FileListArgs(limit=10), + responses, + store=session_store, + ) + assert rc is None + out = capsys.readouterr().out.splitlines() + rows = [json.loads(line) for line in out] + assert rows[0]["uuid"] == "file-1" + assert rows[0]["local_path"] == "/host/app.py" + assert rows[1]["uuid"] == "file-2" + assert rows[1]["local_path"] == "" + + def test_quiet_emits_three_columns(self, contree_client, session_store, capsys): + session_store.cache[("", "local_file:a")] = { + "uuid": "file-1", + "local_path": "/host/app.py", + } + responses = [ + _api_response( + { + "files": [ + { + "uuid": "file-1", + "sha256": "abc", + "size": 10, + "created_at": "2026-05-01T00:00:00Z", + }, + ] + } + ), + ] + _run_file_ls( + contree_client, + FileListArgs(limit=10, quiet=True), + responses, + store=session_store, + ) + out = capsys.readouterr().out.strip() + row = json.loads(out) + assert set(row) == {"uuid", "sha256", "local_path"} + assert row["uuid"] == "file-1" + assert row["sha256"] == "abc" + assert row["local_path"] == "/host/app.py" + + class TestFileSha256: def test_empty_file(self, tmp_path: Path): f = tmp_path / "empty" diff --git a/tests/test_session.py b/tests/test_session.py index 64b99e3..64ffed5 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -771,6 +771,22 @@ def test_persists_across_instances(self, session_store: SessionStore): cache2 = session_store.cache assert cache2["img-1", "files:/"] == ["root"] + def test_local_file_paths_returns_uuid_to_path(self, session_store: SessionStore): + cache = session_store.cache + cache[("", "local_file:a")] = {"uuid": "u-1", "local_path": "/host/a.txt"} + cache[("", "local_file:b")] = {"uuid": "u-2", "local_path": "/host/b.txt"} + cache[("img-x", "files:/etc")] = ["unrelated"] + result = cache.local_file_paths() + assert result == {"u-1": "/host/a.txt", "u-2": "/host/b.txt"} + + def test_local_file_paths_skips_entries_without_local_path( + self, session_store: SessionStore + ): + cache = session_store.cache + cache[("", "local_file:old")] = {"uuid": "u-old"} # no local_path + cache[("", "local_file:new")] = {"uuid": "u-new", "local_path": "/host/x"} + assert cache.local_file_paths() == {"u-new": "/host/x"} + def test_global_image_list(self, session_store: SessionStore): """The image list cache uses empty-string UUID.""" cache = session_store.cache From 24eef7409a3ead496253ca628d099ae301f61f83 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Wed, 13 May 2026 10:59:15 +0200 Subject: [PATCH 7/9] simple Dockerfile interpreter --- contree_cli/agent.md | 27 +-- contree_cli/cli/file.py | 32 +-- contree_cli/docker/context.py | 36 ++-- contree_cli/docker/keyword.py | 12 +- contree_cli/docker/kw_add.py | 106 ++++++++-- contree_cli/docker/kw_arg.py | 7 +- contree_cli/docker/kw_copy.py | 21 +- contree_cli/docker/kw_env.py | 5 +- contree_cli/docker/kw_from.py | 8 +- contree_cli/docker/kw_run.py | 8 +- contree_cli/docker/kw_skipped.py | 5 +- contree_cli/docker/kw_user.py | 5 +- contree_cli/docker/kw_workdir.py | 5 +- contree_cli/docker/url_fetch.py | 303 ++++++++++++++++++++++++++++ contree_cli/session.py | 19 +- contree_cli/skill_body.md | 21 +- docs/commands/build.md | 17 +- docs/commands/file.md | 30 +-- docs/examples/build-demo/Dockerfile | 4 + tests/conftest.py | 13 +- tests/test_build.py | 54 +++++ tests/test_client.py | 2 +- tests/test_file_cmd.py | 26 +-- tests/test_session.py | 19 +- tests/test_url_fetch.py | 205 +++++++++++++++++++ 25 files changed, 863 insertions(+), 127 deletions(-) create mode 100644 contree_cli/docker/url_fetch.py create mode 100644 tests/test_url_fetch.py diff --git a/contree_cli/agent.md b/contree_cli/agent.md index 0f6da54..090b762 100644 --- a/contree_cli/agent.md +++ b/contree_cli/agent.md @@ -201,24 +201,27 @@ Pending files are branch-aware. Listing uploaded files: contree file ls list all uploaded files in the project contree file ls --since 1d narrow by upload time - contree file ls -q uuid + sha256 + local_path only (quiet) + contree file ls -q uuid + sha256 + source only (quiet) contree -f json file ls JSON output for jq Output joins remote files (uuid, sha256, size, created_at) with the - local upload cache: when the CLI uploaded the file from this host - it shows the absolute LOCAL_PATH alongside the remote UUID. - - IMPORTANT: LOCAL_PATH is resolved ONLY for files uploaded from this - specific machine. The path-to-uuid mapping lives in the local SQLite - cache (per profile, under $CONTREE_HOME/cli/sessions/.db) - keyed by path+inode+mtime+size, and is NOT shared between hosts. - Rows show empty LOCAL_PATH when: + local upload cache. The SOURCE column shows whatever this machine + used to produce the file: + - absolute host path for files uploaded via `run --file` / `COPY`; + - https://... URL for files fetched via `ADD URL`. + + IMPORTANT: SOURCE is resolved ONLY for files uploaded from this + specific machine. The mapping lives in the local SQLite cache (per + profile, under $CONTREE_HOME/cli/sessions/.db) keyed by + path+inode+mtime+size (for host paths) or by the URL itself (for + URL fetches), and is NOT shared between hosts. Rows show empty + SOURCE when: - the file was uploaded from a different machine or by a teammate; - - the local file has been moved, renamed, or its inode/mtime/size + - the host file has been moved, renamed, or its inode/mtime/size changed since upload (the cache key no longer matches); - - the upload happened before path tracking landed (older entries + - the upload happened before tracking landed (older entries backfill on the next match). - An agent must not assume LOCAL_PATH is authoritative across hosts; + An agent must not assume SOURCE is authoritative across hosts; for cross-machine identity always use the remote UUID or sha256. More: contree run --help, contree file --help diff --git a/contree_cli/cli/file.py b/contree_cli/cli/file.py index a17565d..85cd4a6 100644 --- a/contree_cli/cli/file.py +++ b/contree_cli/cli/file.py @@ -142,21 +142,23 @@ def setup_parser(p: argparse.ArgumentParser) -> SetupResult: help="List uploaded files (joined with local cache)", description=( "List remote files uploaded to the project and, when present in" - " the local upload cache, show the host path that produced them.\n" + " the local upload cache, show what produced them under the" + " 'source' column: either an absolute host path (for run --file" + " / COPY uploads) or a URL (for ADD URL).\n" "\n" - "local_path is THIS-MACHINE ONLY: the mapping lives in the local" - " CLI cache ($CONTREE_HOME/cli/sessions/.db) keyed by" - " path+inode+mtime+size and is never synced. Files uploaded from" - " a different host, by a teammate, or before path tracking landed" - " will show an empty local_path -- that is expected, not a bug." - " Use the remote uuid or sha256 for cross-machine identity." + "source is THIS-MACHINE ONLY: the mapping lives in the local" + " CLI cache ($CONTREE_HOME/cli/sessions/.db) and is" + " never synced. Files uploaded from a different host, by a" + " teammate, or before tracking landed will show an empty source" + " -- that is expected, not a bug. Use the remote uuid or sha256" + " for cross-machine identity." ), epilog=( "examples:\n" " contree file ls\n" " contree file ls --since 1d\n" " contree file ls --limit 5000\n" - " contree file ls -q # uuid + sha256 + local_path\n" + " contree file ls -q # uuid + sha256 + source\n" " contree -f json file ls\n" ), ) @@ -180,8 +182,8 @@ def setup_parser(p: argparse.ArgumentParser) -> SetupResult: *FLAGS["quiet"], action="store_true", help=( - "Emit only uuid, sha256, and local_path columns. local_path is" - " populated only for files uploaded from this very machine." + "Emit only uuid, sha256, and source columns. source is populated" + " only for files uploaded from this very machine." ), ) ls_p.set_defaults(handler=cmd_file_ls, load_args=FileListArgs) @@ -313,7 +315,7 @@ def cmd_file_ls(args: FileListArgs) -> int | None: store = SESSION_STORE.get() formatter = FORMATTER.get() - local_paths = store.cache.local_file_paths() + sources = store.cache.local_file_paths() params: dict[str, str] = {} if args.since is not None: @@ -333,14 +335,12 @@ def cmd_file_ls(args: FileListArgs) -> int | None: return None for entry in files: uuid_str = entry.get("uuid") - local_path = ( - local_paths.get(uuid_str, "") if isinstance(uuid_str, str) else "" - ) + source = sources.get(uuid_str, "") if isinstance(uuid_str, str) else "" if args.quiet: formatter( uuid=uuid_str, sha256=entry.get("sha256", ""), - local_path=local_path, + source=source, ) continue row: dict[str, object] = {} @@ -350,7 +350,7 @@ def cmd_file_ls(args: FileListArgs) -> int | None: if key in {"created_at", "updated_at"} and isinstance(value, str): value = parse_datetime(value) row[key] = value - row["local_path"] = local_path + row["source"] = source formatter(**row) emitted += len(files) if len(files) < page_size: diff --git a/contree_cli/docker/context.py b/contree_cli/docker/context.py index 8e99cc9..6e5524b 100644 --- a/contree_cli/docker/context.py +++ b/contree_cli/docker/context.py @@ -2,7 +2,6 @@ from __future__ import annotations -import contextlib import hashlib import json import logging @@ -121,7 +120,6 @@ def try_cache_hit(self, branch_name: str) -> str | None: return None self.store.switch_branch(branch_name) self.last_image = tip.image_uuid - logger.info("layer cache hit: %s -> %s", branch_name, tip.image_uuid) return tip.image_uuid def commit_layer( @@ -133,33 +131,31 @@ def commit_layer( title: str, operation_uuid: str = "", ) -> None: - """Materialize a fresh layer branch pointing at ``image_uuid``. + """Materialize a layer branch pointing at ``image_uuid``. - Forks from the currently active branch (the parent layer). When the - session is brand-new and has no active branch, the first ``set_image`` - bootstraps the implicit ``main`` branch before we fork. + Works whether the target branch is brand-new, stale from a prior + no-cache rebuild, or even the currently active branch -- in every + case ``set_image_on_branch`` appends a new history entry on it and + moves the branch pointer to the new tip. We then make it active. """ - with contextlib.suppress(ValueError): - self.store.delete_branch(branch_name) - if self.store.session is None: + # Bootstrap: set_image creates the implicit 'main' branch + # before we fork off the named layer. self.store.set_image( image_uuid, kind=kind, title=title, operation_uuid=operation_uuid, ) - self.store.create_branch(branch_name) - self.store.switch_branch(branch_name) - else: - self.store.create_branch(branch_name) - self.store.switch_branch(branch_name) - self.store.set_image( - image_uuid, - kind=kind, - title=title, - operation_uuid=operation_uuid, - ) + + self.store.set_image_on_branch( + branch_name, + image_uuid, + kind=kind, + title=title, + operation_uuid=operation_uuid, + ) + self.store.switch_branch(branch_name) self.last_image = image_uuid self.last_op_uuid = operation_uuid diff --git a/contree_cli/docker/keyword.py b/contree_cli/docker/keyword.py index 7482df4..9aea35b 100644 --- a/contree_cli/docker/keyword.py +++ b/contree_cli/docker/keyword.py @@ -68,9 +68,14 @@ def parse_keyval_pairs(rest: str) -> dict[str, str]: return pairs -@dataclass(frozen=True) +@dataclass(frozen=True, repr=False) class DockerKeyword: - """Base class. Subclasses implement ``parse``, ``serialize``, ``execute``.""" + """Base class. Subclasses implement ``parse``, ``serialize``, ``execute``. + + ``__repr__`` is overridden in every subclass to render the directive as + it would appear in a Dockerfile, so build logs look like the original + source. + """ NAME: ClassVar[str] = "" @@ -84,3 +89,6 @@ def serialize(self) -> str: def execute(self, ctx: BuildContext) -> None: raise NotImplementedError + + def __repr__(self) -> str: + return self.NAME or self.__class__.__name__ diff --git a/contree_cli/docker/kw_add.py b/contree_cli/docker/kw_add.py index d4688ef..b906aef 100644 --- a/contree_cli/docker/kw_add.py +++ b/contree_cli/docker/kw_add.py @@ -1,25 +1,36 @@ """``ADD [--chown=...] [--chmod=...] SRC... DEST`` - file/dir variant of COPY. -URL fetches and tar extraction (the parts of ``ADD`` that distinguish it from -``COPY``) are not supported in the MVP - those inputs emit a warning and are -skipped. +URL sources are streamed straight from the upstream socket into +``POST /v1/files`` (no local disk copy) and cached by URL with their +HTTP validators so the next build reuses the remote ``file_uuid`` +whenever the upstream's ``ETag``/``Last-Modified``/``Content-MD5`` still +matches. Local sources fall through to the same walker that ``COPY`` +uses. Tar auto-extraction is not implemented. """ from __future__ import annotations import json import logging +import posixpath from dataclasses import dataclass, field from typing import ClassVar -from .context import BuildContext +from .context import BuildContext, PendingFile from .keyword import DockerKeyword -from .kw_copy import parse_copy_like, stage_copy +from .kw_copy import ( + format_copy_like, + parse_chmod, + parse_chown, + parse_copy_like, + stage_copy, +) +from .url_fetch import FetchedUrl, fetch_and_upload, is_url, url_basename logger = logging.getLogger(__name__) -@dataclass(frozen=True) +@dataclass(frozen=True, repr=False) class AddKeyword(DockerKeyword): NAME: ClassVar[str] = "ADD" sources: tuple[str, ...] = field(default_factory=tuple) @@ -28,6 +39,9 @@ class AddKeyword(DockerKeyword): chmod: str = "" from_stage: str = "" + def __repr__(self) -> str: + return format_copy_like("ADD", self) + @classmethod def parse(cls, args_text: str) -> AddKeyword: return parse_copy_like(cls, args_text, "ADD") @@ -43,18 +57,76 @@ def execute(self, ctx: BuildContext) -> None: logger.warning("ADD --from=%s not supported, skipping", self.from_stage) return - url_sources = [s for s in self.sources if is_url(s)] + sub_dest = ctx.substitute(self.dest) + if not posixpath.isabs(sub_dest): + sub_dest = posixpath.normpath(posixpath.join(ctx.workdir or "/", sub_dest)) + + local_sources: list[str] = [] + url_sources: list[str] = [] + for raw in self.sources: + value = ctx.substitute(raw) + (url_sources if is_url(value) else local_sources).append(value) + if url_sources: - for url in url_sources: - logger.warning("ADD URL %s not supported, skipping", url) - local_sources = tuple(s for s in self.sources if not is_url(s)) - if not local_sources: - return - stage_copy(ctx, local_sources, self.dest, self.chown, self.chmod) - return + for url, fetched in stage_urls( + ctx, + tuple(url_sources), + sub_dest, + chown=ctx.substitute(self.chown), + chmod=ctx.substitute(self.chmod), + multi_source=len(self.sources) > 1, + ): + if fetched.cache_state == "head": + logger.info("CACHED: %r (HEAD validators match): %s", self, url) + elif fetched.cache_state == "get-304": + logger.info("CACHED: %r (GET 304 Not Modified): %s", self, url) - stage_copy(ctx, self.sources, self.dest, self.chown, self.chmod) + if local_sources: + stage_copy( + ctx, + tuple(local_sources), + self.dest, + self.chown, + self.chmod, + ) -def is_url(value: str) -> bool: - return value.startswith(("http://", "https://", "ftp://")) +def stage_urls( + ctx: BuildContext, + urls: tuple[str, ...], + dest: str, + *, + chown: str, + chmod: str, + multi_source: bool, +) -> list[tuple[str, FetchedUrl]]: + """Stream each URL into ``POST /v1/files`` and stage a pending file. + + Returns ``[(url, FetchedUrl), ...]`` so the caller can decide how to + log the outcome (``fetched.cache_state`` tells whether the upstream + was downloaded or short-circuited via HEAD/304). + """ + uid, gid = parse_chown(chown) + mode_override = parse_chmod(chmod) + dest_is_dir = dest.endswith("/") or multi_source + + fetches: list[tuple[str, FetchedUrl]] = [] + for url in urls: + fetched = fetch_and_upload(url, ctx.client, ctx.store, timeout=ctx.timeout) + if dest_is_dir: + instance_path = posixpath.join(dest.rstrip("/"), url_basename(url)) + else: + instance_path = dest + mode = mode_override if mode_override is not None else 0o644 + ctx.pending.append( + PendingFile( + instance_path=instance_path, + file_uuid=fetched.file_uuid, + sha256=fetched.sha256, + uid=uid, + gid=gid, + mode=f"{mode:04o}", + ) + ) + fetches.append((url, fetched)) + return fetches diff --git a/contree_cli/docker/kw_arg.py b/contree_cli/docker/kw_arg.py index f8769a7..72e7cd9 100644 --- a/contree_cli/docker/kw_arg.py +++ b/contree_cli/docker/kw_arg.py @@ -9,12 +9,17 @@ from .keyword import DockerKeyword -@dataclass(frozen=True) +@dataclass(frozen=True, repr=False) class ArgKeyword(DockerKeyword): NAME: ClassVar[str] = "ARG" name: str = "" default: str | None = None + def __repr__(self) -> str: + if self.default is None: + return f"ARG {self.name}" + return f"ARG {self.name}={self.default}" + @classmethod def parse(cls, args_text: str) -> ArgKeyword: raw = args_text.strip() diff --git a/contree_cli/docker/kw_copy.py b/contree_cli/docker/kw_copy.py index 65df59d..f3c182c 100644 --- a/contree_cli/docker/kw_copy.py +++ b/contree_cli/docker/kw_copy.py @@ -19,7 +19,7 @@ T = TypeVar("T", bound=DockerKeyword) -@dataclass(frozen=True) +@dataclass(frozen=True, repr=False) class CopyKeyword(DockerKeyword): NAME: ClassVar[str] = "COPY" sources: tuple[str, ...] = field(default_factory=tuple) @@ -28,6 +28,9 @@ class CopyKeyword(DockerKeyword): chmod: str = "" from_stage: str = "" + def __repr__(self) -> str: + return format_copy_like("COPY", self) + @classmethod def parse(cls, args_text: str) -> CopyKeyword: return parse_copy_like(cls, args_text, "COPY") @@ -157,3 +160,19 @@ def resolve_id(value: str) -> int: return int(value) except ValueError: return 0 + + +def format_copy_like(name: str, kw: object) -> str: + flags: list[str] = [] + chown = getattr(kw, "chown", "") + chmod = getattr(kw, "chmod", "") + from_stage = getattr(kw, "from_stage", "") + if from_stage: + flags.append(f"--from={from_stage}") + if chown: + flags.append(f"--chown={chown}") + if chmod: + flags.append(f"--chmod={chmod}") + sources = list(getattr(kw, "sources", ())) + dest = getattr(kw, "dest", "") + return " ".join([name, *flags, *sources, dest]) diff --git a/contree_cli/docker/kw_env.py b/contree_cli/docker/kw_env.py index 11253db..e2b5412 100644 --- a/contree_cli/docker/kw_env.py +++ b/contree_cli/docker/kw_env.py @@ -9,11 +9,14 @@ from .keyword import DockerKeyword, parse_keyval_pairs -@dataclass(frozen=True) +@dataclass(frozen=True, repr=False) class EnvKeyword(DockerKeyword): NAME: ClassVar[str] = "ENV" pairs: tuple[tuple[str, str], ...] = field(default_factory=tuple) + def __repr__(self) -> str: + return "ENV " + " ".join(f"{k}={v}" for k, v in self.pairs) + @classmethod def parse(cls, args_text: str) -> EnvKeyword: raw = args_text.strip() diff --git a/contree_cli/docker/kw_from.py b/contree_cli/docker/kw_from.py index 7c00789..ba52cc8 100644 --- a/contree_cli/docker/kw_from.py +++ b/contree_cli/docker/kw_from.py @@ -21,12 +21,17 @@ TERMINAL_STATUSES = frozenset({"SUCCESS", "FAILED", "CANCELLED"}) -@dataclass(frozen=True) +@dataclass(frozen=True, repr=False) class FromKeyword(DockerKeyword): NAME: ClassVar[str] = "FROM" image_ref: str = "" alias: str = "" + def __repr__(self) -> str: + if self.alias: + return f"FROM {self.image_ref} AS {self.alias}" + return f"FROM {self.image_ref}" + @classmethod def parse(cls, args_text: str) -> FromKeyword: raw = args_text.strip() @@ -52,6 +57,7 @@ def execute(self, ctx: BuildContext) -> None: ctx.pending.clear() cached = ctx.try_cache_hit(branch_name) if cached is not None: + logger.info("CACHED: %r -> %s", self, cached) ctx.parent_hash = from_hash return diff --git a/contree_cli/docker/kw_run.py b/contree_cli/docker/kw_run.py index dac33f4..8bc72fe 100644 --- a/contree_cli/docker/kw_run.py +++ b/contree_cli/docker/kw_run.py @@ -19,12 +19,17 @@ TERMINAL_STATUSES = frozenset({"SUCCESS", "FAILED", "CANCELLED"}) -@dataclass(frozen=True) +@dataclass(frozen=True, repr=False) class RunKeyword(DockerKeyword): NAME: ClassVar[str] = "RUN" parts: tuple[str, ...] = field(default_factory=tuple) shell_form: bool = True + def __repr__(self) -> str: + if self.shell_form: + return f"RUN {self.parts[0] if self.parts else ''}" + return f"RUN {json.dumps(list(self.parts))}" + @classmethod def parse(cls, args_text: str) -> RunKeyword: raw = args_text.strip() @@ -48,6 +53,7 @@ def execute(self, ctx: BuildContext) -> None: cached = ctx.try_cache_hit(branch_name) if cached is not None: + logger.info("CACHED: %r -> %s", self, cached) ctx.parent_hash = chain ctx.pending.clear() return diff --git a/contree_cli/docker/kw_skipped.py b/contree_cli/docker/kw_skipped.py index 7a3484f..a8b9cc2 100644 --- a/contree_cli/docker/kw_skipped.py +++ b/contree_cli/docker/kw_skipped.py @@ -11,11 +11,14 @@ logger = logging.getLogger(__name__) -@dataclass(frozen=True) +@dataclass(frozen=True, repr=False) class SkippedKeyword(DockerKeyword): name: str = "" raw: str = "" + def __repr__(self) -> str: + return f"{self.name} {self.raw}".rstrip() + @classmethod def of(cls, name: str, raw: str) -> SkippedKeyword: return cls(name=name.upper(), raw=raw) diff --git a/contree_cli/docker/kw_user.py b/contree_cli/docker/kw_user.py index 1fa3f9a..c429ef7 100644 --- a/contree_cli/docker/kw_user.py +++ b/contree_cli/docker/kw_user.py @@ -9,11 +9,14 @@ from .keyword import DockerKeyword -@dataclass(frozen=True) +@dataclass(frozen=True, repr=False) class UserKeyword(DockerKeyword): NAME: ClassVar[str] = "USER" spec: str = "" + def __repr__(self) -> str: + return f"USER {self.spec}" + @classmethod def parse(cls, args_text: str) -> UserKeyword: raw = args_text.strip() diff --git a/contree_cli/docker/kw_workdir.py b/contree_cli/docker/kw_workdir.py index f219642..65bb976 100644 --- a/contree_cli/docker/kw_workdir.py +++ b/contree_cli/docker/kw_workdir.py @@ -10,11 +10,14 @@ from .keyword import DockerKeyword -@dataclass(frozen=True) +@dataclass(frozen=True, repr=False) class WorkdirKeyword(DockerKeyword): NAME: ClassVar[str] = "WORKDIR" path: str = "" + def __repr__(self) -> str: + return f"WORKDIR {self.path}" + @classmethod def parse(cls, args_text: str) -> WorkdirKeyword: raw = args_text.strip() diff --git a/contree_cli/docker/url_fetch.py b/contree_cli/docker/url_fetch.py new file mode 100644 index 0000000..5dda18c --- /dev/null +++ b/contree_cli/docker/url_fetch.py @@ -0,0 +1,303 @@ +"""Fetch a URL straight into ``POST /v1/files`` for ``ADD`` directives. + +The body is streamed from the source socket to the contree API socket +without ever touching local disk. Before the download starts we issue a +``HEAD`` against the source URL: if our cached ``etag`` / +``last-modified`` / ``content-md5`` still match the upstream headers we +return the cached remote ``file_uuid`` without re-downloading or +re-uploading. When the upstream has no usable validators we issue the +``GET``, hash the body as it flies through, and persist whatever +validators the server did return so the next build can short-circuit. + +HTTP transport is ``urllib.request`` which transparently follows +redirects and handles HTTPS. +""" + +from __future__ import annotations + +import hashlib +import json +import logging +import time +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass +from typing import IO, cast + +from contree_cli.client import ApiError, ContreeClient +from contree_cli.session import SessionStore + +logger = logging.getLogger(__name__) + +DOWNLOAD_TIMEOUT_DEFAULT = 300 +USER_AGENT = "contree-cli url-fetch" + + +@dataclass(frozen=True) +class FetchedUrl: + url: str + file_uuid: str + sha256: str + size: int + cache_state: str = "fetched" # "head", "get-304", or "fetched" + + +def fetch_and_upload( + url: str, + client: ContreeClient, + store: SessionStore, + *, + timeout: int = DOWNLOAD_TIMEOUT_DEFAULT, +) -> FetchedUrl: + """Resolve ``url`` to a remote ``file_uuid``, skipping work whenever + cached validators match the upstream. + + The returned ``cache_state`` tells the caller how the result was + obtained (``"head"``, ``"get-304"``, ``"fetched"``); use it to emit + a ``CACHED:`` log line at the call site. + """ + cache_key = url_cache_key(url) + meta = read_metadata(store, cache_key) + + head_headers = http_head(url, timeout=timeout) + if meta and head_headers and validators_match(meta, head_headers): + bump_fetched_at(store, cache_key, meta) + return cached_result(url, meta, cache_state="head") + + cond = conditional_headers(meta) if meta else {} + status, response_headers, source = http_get_stream( + url, headers=cond, timeout=timeout + ) + if status == 304: + if not meta: + raise RuntimeError( + f"server returned 304 for {url!r} but no cached metadata exists" + ) + bump_fetched_at(store, cache_key, meta) + return cached_result(url, meta, cache_state="get-304") + + assert source is not None + try: + reader = HashingReader(source) + upload_headers: dict[str, str] = {"Content-Type": "application/octet-stream"} + content_length = parse_content_length(response_headers) + if content_length > 0: + upload_headers["Content-Length"] = str(content_length) + + resp = client.request( + "POST", + "/v1/files", + body=cast(IO[bytes], reader), + headers=upload_headers, + ) + data = json.loads(resp.read()) + finally: + close = getattr(source, "close", None) + if callable(close): + close() + + file_uuid = str(data["uuid"]) + sha = reader.hasher.hexdigest() + size = reader.bytes_read + + write_metadata( + store, + cache_key, + { + "uuid": file_uuid, + "url": url, + "sha256": sha, + "size": size, + "etag": response_headers.get("etag", ""), + "last_modified": response_headers.get("last-modified", ""), + "content_md5": response_headers.get("content-md5", ""), + "fetched_at": time.time(), + }, + ) + logger.info( + "URL piped %s -> %s (%d bytes, sha %s)", + url, + file_uuid, + size, + sha[:12], + ) + return FetchedUrl(url=url, file_uuid=file_uuid, sha256=sha, size=size) + + +def url_cache_key(url: str) -> str: + """The URL is its own identity in the local_file cache.""" + return f"local_file:{url}" + + +def read_metadata(store: SessionStore, cache_key: str) -> dict[str, object] | None: + value = store.cache.get(("", cache_key)) + if isinstance(value, dict): + return value + return None + + +def write_metadata( + store: SessionStore, + cache_key: str, + meta: dict[str, object], +) -> None: + store.cache[("", cache_key)] = meta + + +def bump_fetched_at( + store: SessionStore, + cache_key: str, + meta: dict[str, object], +) -> None: + refreshed = dict(meta) + refreshed["fetched_at"] = time.time() + write_metadata(store, cache_key, refreshed) + + +def cached_result(url: str, meta: dict[str, object], *, cache_state: str) -> FetchedUrl: + size_raw = meta.get("size", -1) + size = size_raw if isinstance(size_raw, int) else -1 + return FetchedUrl( + url=url, + file_uuid=str(meta["uuid"]), + sha256=str(meta["sha256"]), + size=size, + cache_state=cache_state, + ) + + +def validators_match(meta: dict[str, object], headers: dict[str, str]) -> bool: + """Return True if any cached validator still matches the upstream headers.""" + etag_cached = meta.get("etag") + etag_upstream = headers.get("etag") + if isinstance(etag_cached, str) and etag_cached and etag_cached == etag_upstream: + return True + lm_cached = meta.get("last_modified") + lm_upstream = headers.get("last-modified") + if isinstance(lm_cached, str) and lm_cached and lm_cached == lm_upstream: + return True + md5_cached = meta.get("content_md5") + md5_upstream = headers.get("content-md5") + return bool( + isinstance(md5_cached, str) and md5_cached and md5_cached == md5_upstream + ) + + +def conditional_headers(meta: dict[str, object]) -> dict[str, str]: + headers: dict[str, str] = {} + etag = meta.get("etag") + if isinstance(etag, str) and etag: + headers["If-None-Match"] = etag + last_modified = meta.get("last_modified") + if isinstance(last_modified, str) and last_modified: + headers["If-Modified-Since"] = last_modified + return headers + + +def parse_content_length(headers: dict[str, str]) -> int: + raw = headers.get("content-length") + if not raw: + return -1 + try: + return int(raw) + except ValueError: + return -1 + + +class HashingReader: + """Read-only adapter that hashes bytes as they flow through. + + Intentionally has no ``seek`` attribute so ``ContreeClient.request`` + does not attempt to rewind the upstream HTTP body on retry. + """ + + __slots__ = ("bytes_read", "hasher", "source") + + def __init__(self, source: object) -> None: + self.source = source + self.hasher = hashlib.sha256() + self.bytes_read = 0 + + def read(self, amt: int | None = None) -> bytes: + chunk = self.source.read() if amt is None else self.source.read(amt) # type: ignore[attr-defined] + if chunk: + self.hasher.update(chunk) + self.bytes_read += len(chunk) + return chunk # type: ignore[no-any-return] + + +def http_head(url: str, *, timeout: int = DOWNLOAD_TIMEOUT_DEFAULT) -> dict[str, str]: + """Best-effort ``HEAD`` request. Returns empty headers on any failure. + + ``urllib.request`` transparently follows 3xx redirects, so the returned + headers always describe the final resource. + """ + req = urllib.request.Request( + url, + method="HEAD", + headers={"User-Agent": USER_AGENT, "Accept": "*/*"}, + ) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + return {k.lower(): v for k, v in resp.headers.items()} + except urllib.error.HTTPError as exc: + logger.debug("HEAD %s returned %d, skipping validator probe", url, exc.code) + return {} + except (urllib.error.URLError, OSError, ValueError) as exc: + logger.debug("HEAD %s failed (%s), skipping validator probe", url, exc) + return {} + + +def http_get_stream( + url: str, + *, + headers: dict[str, str], + timeout: int = DOWNLOAD_TIMEOUT_DEFAULT, +) -> tuple[int, dict[str, str], object | None]: + """Issue a GET; return ``(status, headers, body_stream | None)``. + + A ``304 Not Modified`` response yields ``(304, headers, None)`` and + has already been drained. For any other non-2xx the function raises + ``RuntimeError``. The caller owns the returned stream and must close it. + Redirects are handled by ``urllib.request``. + """ + req = urllib.request.Request( + url, + method="GET", + headers={"User-Agent": USER_AGENT, "Accept": "*/*", **headers}, + ) + try: + resp = urllib.request.urlopen(req, timeout=timeout) + except urllib.error.HTTPError as exc: + response_headers = {k.lower(): v for k, v in exc.headers.items()} + if exc.code == 304: + exc.close() + return 304, response_headers, None + body = exc.read(2048).decode("utf-8", errors="replace") + exc.close() + raise RuntimeError( + f"GET {url!r} returned {exc.code} {exc.reason}: {body!r}" + ) from exc + + response_headers = {k.lower(): v for k, v in resp.headers.items()} + return resp.status, response_headers, resp + + +def is_url(value: str) -> bool: + return value.startswith(("http://", "https://")) + + +def url_basename(url: str, fallback: str = "downloaded") -> str: + parsed = urllib.parse.urlsplit(url) + name = parsed.path.rsplit("/", 1)[-1] + return name or fallback + + +__all__ = [ + "ApiError", + "FetchedUrl", + "fetch_and_upload", + "is_url", + "url_basename", +] diff --git a/contree_cli/session.py b/contree_cli/session.py index 1e605e4..e052bda 100644 --- a/contree_cli/session.py +++ b/contree_cli/session.py @@ -218,12 +218,13 @@ def __len__(self) -> int: return row[0] # type: ignore[no-any-return] def local_file_paths(self) -> dict[str, str]: - """Map remote file UUID to the host path that uploaded it. + """Map remote file UUID to whatever this machine uploaded it from. - Reads every ``local_file:*`` cache entry, decodes its JSON - payload, and returns ``{remote_uuid: local_path}`` for entries - that have both fields. Older entries without ``local_path`` - are silently skipped. + Reads every ``local_file:*`` cache entry. The value is either an + absolute host path (for files uploaded by ``run --file`` / + ``COPY``) or a ``https://...`` URL (for ``ADD URL``); both come + out of the same cache namespace so callers get a single mapping + regardless of upload source. """ cur = self._conn.execute( "SELECT value FROM image_cache WHERE kind LIKE 'local_file:%'", @@ -234,9 +235,11 @@ def local_file_paths(self) -> dict[str, str]: if not isinstance(value, dict): continue uuid_str = value.get("uuid") - local_path = value.get("local_path") - if isinstance(uuid_str, str) and isinstance(local_path, str): - result[uuid_str] = local_path + if not isinstance(uuid_str, str): + continue + origin = value.get("local_path") or value.get("url") + if isinstance(origin, str) and origin: + result[uuid_str] = origin return result def invalidate_prefix( diff --git a/contree_cli/skill_body.md b/contree_cli/skill_body.md index 0f567bc..2193657 100644 --- a/contree_cli/skill_body.md +++ b/contree_cli/skill_body.md @@ -167,17 +167,18 @@ Unsure about sessions? Run `contree session --help` or `contree agent sessions` - `cp`: download a file from the image to the host. - `file edit`: open a remote file in a host editor and stage it for the next run. - `file cp`: upload a local file and stage it for the next run. -- `file ls`: list uploaded files; rows uploaded from this host show their - `local_path` so the agent can map a UUID back to a workspace file. Add - `-q` for a tight `uuid sha256 local_path` view. - - **`local_path` is THIS-MACHINE ONLY.** The mapping lives in the local - CLI SQLite cache (`$CONTREE_HOME/cli/sessions/.db`) keyed by - `path + inode + mtime + size`. It is not synced anywhere. Rows uploaded - from a different machine, by another teammate, or before path tracking - landed will show an empty `local_path` — that is expected, not a bug. +- `file ls`: list uploaded files; rows produced from this host carry a + `source` field (host path for `run --file` / `COPY`, URL for + `ADD URL`). Add `-q` for a tight `uuid sha256 source` view. + + **`source` is THIS-MACHINE ONLY.** The mapping lives in the local + CLI SQLite cache (`$CONTREE_HOME/cli/sessions/.db`) keyed + by `path + inode + mtime + size` for host paths and by the URL + itself for URL fetches. It is not synced anywhere. Rows uploaded + from a different machine, by another teammate, or before tracking + landed will show an empty `source` -- that is expected, not a bug. When working across hosts, treat the remote `uuid`/`sha256` as the - authoritative identifier and never rely on `local_path` resolving. + authoritative identifier and never rely on `source` resolving. - `session branch`: create an experimental branch. - `session checkout`: switch active branch. - `session rollback`: move the active branch pointer backward. diff --git a/docs/commands/build.md b/docs/commands/build.md index 754c155..8c4b85a 100644 --- a/docs/commands/build.md +++ b/docs/commands/build.md @@ -49,7 +49,7 @@ contree build . --no-cache --tag myapp:dev | `FROM ref[:tag] [AS name]` | Resolves the base image. If the tag is not found locally, the build auto-imports it via `POST /v1/images/import`. `AS name` is parsed but ignored (multi-stage is Phase 2). | | `RUN ...` | Shell-form (`RUN echo hi`) or JSON exec-form (`RUN ["echo","hi"]`). Spawns `POST /v1/instances`, polls until terminal status, captures the resulting image. | | `COPY [--chown=...] [--chmod=...] SRC... DEST` | Walks local sources relative to the build context, applies `.dockerignore`, uploads files (with SHA256 dedup), and stages them for the next `RUN`. | -| `ADD ...` | Same as `COPY` for local files; URL/tar inputs emit a warning and are skipped. | +| `ADD ...` | Local paths behave like `COPY`. `https://`/`http://` URLs are **streamed straight from the source socket into `POST /v1/files`** (no temp file on disk); the URL plus its `ETag`/`Last-Modified`/`Content-MD5` validators are cached so repeat builds skip the download via a conditional `HEAD`. Tar auto-extraction is not implemented. | | `WORKDIR /path` | Sets the working directory for subsequent directives. | | `ENV KEY=VALUE ...` | Accumulates environment variables passed to every `RUN`. | | `ARG NAME[=DEFAULT]` | Declares a build-time variable. Overridden by `--build-arg`. | @@ -133,11 +133,20 @@ WORKDIR /app COPY hello.py /app/hello.py COPY src /app/src +ADD https://github.com/nebius/contree-cli/archive/refs/heads/master.zip /tmp/contree-cli.zip RUN python -c "import sys; print('python', sys.version)" +RUN python -m zipfile -e /tmp/contree-cli.zip /opt/ +RUN pip install --no-cache-dir /opt/contree-cli-master +RUN contree --help | head -20 RUN python /app/hello.py ``` +The `ADD` line streams the zip straight from GitHub into the contree +API (no local temp file). The subsequent `RUN` steps unpack it, +`pip install` the project, and prove the installed `contree` binary +works inside the built image. + ```dockerfile % docs/examples/build-demo/.dockerignore **/*.log @@ -170,8 +179,10 @@ IMAGE TAG SESSION contree-cli-build-demo:latest build: ``` -Re-running the same command without `--no-cache` produces three layer -cache hits and no API instance spawns. +Re-running the same command without `--no-cache` produces layer cache +hits, and the `ADD URL` step short-circuits at the `HEAD` probe (look +for `URL cache hit (HEAD validators match)` in the log) -- no body +download, no upload. ## See also diff --git a/docs/commands/file.md b/docs/commands/file.md index c24faa2..1bc8e55 100644 --- a/docs/commands/file.md +++ b/docs/commands/file.md @@ -38,34 +38,38 @@ file is uploaded immediately but only applied to the sandbox on the next ### `file ls` Lists files uploaded to the project (`GET /v1/files`) and joins each row -with the local upload cache so that the host path that produced the file -is shown under `LOCAL_PATH` when known. +with the local upload cache. The `SOURCE` column shows whatever this +machine produced the file from: + +- absolute host path for files uploaded via `run --file` or `COPY`; +- `https://...` URL for files fetched via `ADD URL`. :::{important} -`local_path` resolves **only for files uploaded from this very machine**. +`SOURCE` resolves **only for files uploaded from this very machine**. The mapping lives in the local SQLite cache (per-profile, under -`$CONTREE_HOME/cli/sessions/.db`) and is keyed by -`path + inode + mtime + size`. It is **not** synced anywhere, so a row -will show an empty `LOCAL_PATH` whenever: +`$CONTREE_HOME/cli/sessions/.db`) keyed by +`path + inode + mtime + size` (host paths) or by the URL itself (URL +fetches). It is **not** synced anywhere, so a row will show an empty +`SOURCE` whenever: - the file was uploaded by a different machine, container, or teammate; - the file was uploaded by an earlier CLI version that did not yet - store the host path (those entries backfill the next time the file - is matched by the local cache); -- the local file has been moved, renamed, or its inode/mtime/size has + track its origin (those entries backfill the next time the file is + matched by the local cache); +- the host file has been moved, renamed, or its `inode/mtime/size` has changed since upload (the cache key no longer matches and the mapping is treated as missing until the next upload). -There is no way to recover the host path of a file that was uploaded -from another machine — the server stores only `uuid`, `sha256`, `size`, +There is no way to recover the source of a file uploaded from another +machine -- the server stores only `uuid`, `sha256`, `size`, `created_at`, and `updated_at`. ::: ```bash contree file ls contree file ls --since 1d --limit 200 -contree file ls -q # uuid + sha256 + local_path only -contree -f json file ls | jq 'select(.local_path != "")' +contree file ls -q # uuid + sha256 + source only +contree -f json file ls | jq 'select(.source != "")' ``` ## Pending files diff --git a/docs/examples/build-demo/Dockerfile b/docs/examples/build-demo/Dockerfile index 43d85e6..c1ede24 100644 --- a/docs/examples/build-demo/Dockerfile +++ b/docs/examples/build-demo/Dockerfile @@ -7,6 +7,10 @@ WORKDIR /app COPY hello.py /app/hello.py COPY src /app/src +ADD https://github.com/nebius/contree-cli/archive/refs/heads/master.zip /tmp/contree-cli.zip RUN python -c "import sys; print('python', sys.version)" +RUN python -m zipfile -e /tmp/contree-cli.zip /opt/ +RUN pip install --no-cache-dir /opt/contree-cli-master +RUN contree --help | head -20 RUN python /app/hello.py diff --git a/tests/conftest.py b/tests/conftest.py index 7555d29..052b871 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -88,10 +88,19 @@ def request( self, method: str, path: str, - body: bytes | None = None, + body: object = None, headers: dict[str, str] | None = None, ) -> None: - self.requests.append(RecordedRequest(method, path, body, headers or {})) + if hasattr(body, "read") and not isinstance(body, (bytes, bytearray)): + chunks: list[bytes] = [] + while True: + chunk = body.read(64 * 1024) + if not chunk: + break + chunks.append(chunk) + body = b"".join(chunks) + recorded = body if isinstance(body, (bytes, bytearray, type(None))) else None + self.requests.append(RecordedRequest(method, path, recorded, headers or {})) def getresponse(self) -> FakeResponse: return self.responses.pop(0) diff --git a/tests/test_build.py b/tests/test_build.py index 80f8a12..24ab018 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -229,6 +229,60 @@ def test_no_cache_reruns(self, context_dir, db_path): assert rc is None assert second.request_count == 3 + def test_no_cache_when_from_layer_is_active_branch(self, context_dir, db_path): + """Regression: --no-cache must not blow up when the target layer + branch is currently the active one. + + Reproduces the original failure where ``commit_layer`` tried to + delete an active branch and then re-create it, producing + ``Branch '...' already exists``. With ``set_image_on_branch`` we + update the pointer in place without touching the active flag. + """ + write_dockerfile(context_dir, "FROM tag:ubuntu:latest\nRUN echo hi\n") + + first = ContreeTestClient() + run_build( + first, + BuildArgs(context=str(context_dir)), + [ + make_tag_lookup(BASE_IMG), + make_spawn(), + make_op_success(NEW_IMG), + ], + db_path, + ) + + # Force the active branch back to the FROM layer (simulates a user + # doing `session checkout layer:` between builds, or a + # prior build that ended on FROM only). + import hashlib + + from contree_cli.cli.build import make_session_key + + session_key = make_session_key(context_dir.resolve()) + from_hash = hashlib.sha256(f"FROM:{BASE_IMG}".encode()).hexdigest() + from_branch = f"layer:{from_hash[:16]}" + store = SessionStore(db_path, session_key) + try: + store.switch_branch(from_branch) + finally: + store.close() + + # Rebuild with --no-cache: this previously failed with + # "Branch 'layer:...' already exists". + second = ContreeTestClient() + rc = run_build( + second, + BuildArgs(context=str(context_dir), no_cache=True), + [ + make_tag_lookup(BASE_IMG), + make_spawn("op-2"), + make_op_success(NEW_IMG_2, "op-2"), + ], + db_path, + ) + assert rc is None + class TestCopy: def test_copy_pending_attaches_to_next_run(self, context_dir, db_path): diff --git a/tests/test_client.py b/tests/test_client.py index 54f0041..8590fec 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -318,7 +318,7 @@ def test_passes_file_object_to_connection(self): headers={"Content-Type": "application/octet-stream"}, ) sent = c.get_request(-1).body - assert sent is stream + assert sent == b"a" * 1024 def test_retry_seeks_back_to_start(self): import io diff --git a/tests/test_file_cmd.py b/tests/test_file_cmd.py index 1c3fd10..2e7af2e 100644 --- a/tests/test_file_cmd.py +++ b/tests/test_file_cmd.py @@ -114,17 +114,22 @@ def _run_file_ls( class TestFileLs: - def test_lists_with_local_path(self, contree_client, session_store, capsys): + def test_lists_with_source(self, contree_client, session_store, capsys): session_store.cache[("", "local_file:a")] = { "uuid": "file-1", "local_path": "/host/app.py", } + session_store.cache[("", "local_file:https://example.com/pkg.tgz")] = { + "uuid": "file-3", + "url": "https://example.com/pkg.tgz", + } responses = [ _api_response( { "files": [ {"uuid": "file-1", "sha256": "abc", "size": 10}, {"uuid": "file-2", "sha256": "def", "size": 20}, + {"uuid": "file-3", "sha256": "ghi", "size": 30}, ] } ), @@ -136,12 +141,10 @@ def test_lists_with_local_path(self, contree_client, session_store, capsys): store=session_store, ) assert rc is None - out = capsys.readouterr().out.splitlines() - rows = [json.loads(line) for line in out] - assert rows[0]["uuid"] == "file-1" - assert rows[0]["local_path"] == "/host/app.py" - assert rows[1]["uuid"] == "file-2" - assert rows[1]["local_path"] == "" + rows = [json.loads(line) for line in capsys.readouterr().out.splitlines()] + assert rows[0]["source"] == "/host/app.py" + assert rows[1]["source"] == "" + assert rows[2]["source"] == "https://example.com/pkg.tgz" def test_quiet_emits_three_columns(self, contree_client, session_store, capsys): session_store.cache[("", "local_file:a")] = { @@ -168,12 +171,9 @@ def test_quiet_emits_three_columns(self, contree_client, session_store, capsys): responses, store=session_store, ) - out = capsys.readouterr().out.strip() - row = json.loads(out) - assert set(row) == {"uuid", "sha256", "local_path"} - assert row["uuid"] == "file-1" - assert row["sha256"] == "abc" - assert row["local_path"] == "/host/app.py" + row = json.loads(capsys.readouterr().out.strip()) + assert set(row) == {"uuid", "sha256", "source"} + assert row["source"] == "/host/app.py" class TestFileSha256: diff --git a/tests/test_session.py b/tests/test_session.py index 64ffed5..ce26899 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -779,14 +779,29 @@ def test_local_file_paths_returns_uuid_to_path(self, session_store: SessionStore result = cache.local_file_paths() assert result == {"u-1": "/host/a.txt", "u-2": "/host/b.txt"} - def test_local_file_paths_skips_entries_without_local_path( + def test_local_file_paths_skips_entries_without_source( self, session_store: SessionStore ): cache = session_store.cache - cache[("", "local_file:old")] = {"uuid": "u-old"} # no local_path + cache[("", "local_file:old")] = {"uuid": "u-old"} cache[("", "local_file:new")] = {"uuid": "u-new", "local_path": "/host/x"} assert cache.local_file_paths() == {"u-new": "/host/x"} + def test_local_file_paths_returns_url_sources(self, session_store: SessionStore): + cache = session_store.cache + cache[("", "local_file:https://example.com/pkg.tgz")] = { + "uuid": "u-url", + "url": "https://example.com/pkg.tgz", + } + cache[("", "local_file:/home/u/x.txt")] = { + "uuid": "u-local", + "local_path": "/home/u/x.txt", + } + assert cache.local_file_paths() == { + "u-url": "https://example.com/pkg.tgz", + "u-local": "/home/u/x.txt", + } + def test_global_image_list(self, session_store: SessionStore): """The image list cache uses empty-string UUID.""" cache = session_store.cache diff --git a/tests/test_url_fetch.py b/tests/test_url_fetch.py new file mode 100644 index 0000000..d72d96b --- /dev/null +++ b/tests/test_url_fetch.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +import io +import urllib.error +from unittest.mock import patch + +import pytest + +import contree_cli.docker.url_fetch as url_fetch +from contree_cli.docker.url_fetch import ( + HashingReader, + fetch_and_upload, + is_url, + url_basename, + url_cache_key, + validators_match, +) + + +class FakeStream: + """Minimal http.client.HTTPResponse-like object.""" + + def __init__(self, body: bytes, headers: dict[str, str] | None = None): + self._buf = io.BytesIO(body) + self.headers = headers or {} + self.status = 200 + self.closed = False + + def read(self, amt: int | None = None) -> bytes: + return self._buf.read(amt) if amt is not None else self._buf.read() + + def close(self) -> None: + self.closed = True + + +class TestSmallHelpers: + def test_is_url(self): + assert is_url("https://x") + assert is_url("http://x") + assert not is_url("./x") + assert not is_url("ftp://x") + + def test_url_basename(self): + assert url_basename("https://example.com/foo/bar.tar.gz") == "bar.tar.gz" + assert url_basename("https://example.com/") == "downloaded" + + def test_url_cache_key_uses_url_as_identity(self): + assert url_cache_key("https://x/y") == "local_file:https://x/y" + + def test_validators_match_etag(self): + meta = {"etag": '"abc"', "last_modified": "", "content_md5": ""} + assert validators_match(meta, {"etag": '"abc"'}) is True + assert validators_match(meta, {"etag": '"def"'}) is False + + def test_validators_match_last_modified(self): + meta = {"etag": "", "last_modified": "Mon, 01 Jan 2024 00:00:00 GMT"} + assert validators_match( + meta, {"last-modified": "Mon, 01 Jan 2024 00:00:00 GMT"} + ) + + def test_validators_match_returns_false_when_no_validators(self): + assert validators_match({}, {}) is False + + +class TestHashingReader: + def test_hashes_and_counts(self): + src = FakeStream(b"hello world") + r = HashingReader(src) + chunks = [r.read(5), r.read(5), r.read(5)] + assert b"".join(chunks) == b"hello world" + assert r.bytes_read == 11 + import hashlib + + assert r.hasher.hexdigest() == hashlib.sha256(b"hello world").hexdigest() + + def test_has_no_seek_attribute(self): + """ContreeClient.request relies on absent .seek to skip retry-rewind.""" + r = HashingReader(FakeStream(b"x")) + assert not hasattr(r, "seek") + + +class TestFetchAndUpload: + URL = "https://example.com/pkg.tgz" + + def test_first_fetch_uploads_and_caches(self, contree_client, session_store): + body = b"abcdef" + head_headers = {"etag": '"first"'} + get_headers = {"etag": '"first"', "content-length": str(len(body))} + + contree_client.respond_json({"uuid": "remote-1", "sha256": "x"}) + + with ( + patch.object(url_fetch, "http_head", return_value=head_headers), + patch.object( + url_fetch, + "http_get_stream", + return_value=(200, get_headers, FakeStream(body)), + ), + ): + result = fetch_and_upload(self.URL, contree_client, session_store) + + assert result.file_uuid == "remote-1" + assert result.size == len(body) + + cached = session_store.cache.get(("", url_cache_key(self.URL))) + assert isinstance(cached, dict) + assert cached["uuid"] == "remote-1" + assert cached["url"] == self.URL + assert cached["etag"] == '"first"' + + upload_req = contree_client.get_request(0) + assert upload_req.method == "POST" + assert "/v1/files" in upload_req.path + + def test_head_validators_skip_download(self, contree_client, session_store): + cache_key = url_cache_key(self.URL) + session_store.cache[("", cache_key)] = { + "uuid": "remote-cached", + "sha256": "cached-sha", + "url": self.URL, + "etag": '"v1"', + "size": 42, + } + with ( + patch.object(url_fetch, "http_head", return_value={"etag": '"v1"'}), + patch.object(url_fetch, "http_get_stream") as get_mock, + ): + result = fetch_and_upload(self.URL, contree_client, session_store) + + assert result.file_uuid == "remote-cached" + assert result.sha256 == "cached-sha" + get_mock.assert_not_called() + assert contree_client.request_count == 0 + + def test_get_304_skips_upload(self, contree_client, session_store): + cache_key = url_cache_key(self.URL) + session_store.cache[("", cache_key)] = { + "uuid": "remote-cached", + "sha256": "cached-sha", + "url": self.URL, + "etag": '"v1"', + "size": 42, + } + # HEAD reports no usable validators (e.g. stripped by intermediary). + with ( + patch.object(url_fetch, "http_head", return_value={}), + patch.object( + url_fetch, + "http_get_stream", + return_value=(304, {"etag": '"v1"'}, None), + ), + ): + result = fetch_and_upload(self.URL, contree_client, session_store) + + assert result.file_uuid == "remote-cached" + assert contree_client.request_count == 0 + + +class TestHttpHelpers: + def test_http_head_returns_empty_on_http_error(self): + err = urllib.error.HTTPError( + url="https://x", + code=405, + msg="Method Not Allowed", + hdrs=None, # type: ignore[arg-type] + fp=io.BytesIO(b""), + ) + with patch("urllib.request.urlopen", side_effect=err): + assert url_fetch.http_head("https://x") == {} + + def test_http_get_stream_translates_304_to_status_only(self): + class FakeHeaders: + def items(self): + return [("ETag", '"v1"')] + + err = urllib.error.HTTPError( + url="https://x", + code=304, + msg="Not Modified", + hdrs=FakeHeaders(), # type: ignore[arg-type] + fp=io.BytesIO(b""), + ) + with patch("urllib.request.urlopen", side_effect=err): + status, headers, source = url_fetch.http_get_stream("https://x", headers={}) + assert status == 304 + assert headers == {"etag": '"v1"'} + assert source is None + + def test_http_get_stream_raises_on_non_2xx(self): + class FakeHeaders: + def items(self): + return [] + + err = urllib.error.HTTPError( + url="https://x", + code=500, + msg="Internal Server Error", + hdrs=FakeHeaders(), # type: ignore[arg-type] + fp=io.BytesIO(b"oops"), + ) + with ( + patch("urllib.request.urlopen", side_effect=err), + pytest.raises(RuntimeError, match="500"), + ): + url_fetch.http_get_stream("https://x", headers={}) From 8a76f18fea666461c838b5c7b10fc4af628e4fd8 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Wed, 13 May 2026 13:42:09 +0200 Subject: [PATCH 8/9] fix: handle Windows drive letter in --file completion complete_mapped_file split on ':' first, which consumed the drive-letter colon (C:\...) as the host/instance separator on Windows and never reached the host-path branch. Peel the drive off first with os.path.splitdrive, matching MappedFile.parse, so segment-0 completion works on Windows. Fixes the failing test_initial_host_path test on Python 3.10-3.13 on windows-latest. --- contree_cli/shell/sources.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/contree_cli/shell/sources.py b/contree_cli/shell/sources.py index 116c98a..73af09e 100644 --- a/contree_cli/shell/sources.py +++ b/contree_cli/shell/sources.py @@ -370,16 +370,20 @@ def complete_mapped_file(text: str, ctx: CompletionContext) -> list[str]: value. Each candidate must therefore replace the whole token, not just the trailing segment. """ - parts = split_mapped_value(text) + # On Windows the host path may carry a drive prefix (``C:``) whose colon + # is part of the path, not the host/instance separator. Mirror the + # ``MappedFile.parse`` heuristic by peeling the drive off before splitting. + drive, rest = os.path.splitdrive(text) + parts = split_mapped_value(rest) head = parts[:-1] tail = parts[-1] if parts else "" - prefix = (":".join(head) + ":") if head else "" + prefix = drive + ((":".join(head) + ":") if head else "") # Segment 0: host path completion. Trailing "/" for dirs (no space) so the # user can keep typing "/foo" or ":m0" next. if not head: - host_candidates = complete_host_path(tail, ctx) - return [prefix + cand.rstrip(" ") for cand in host_candidates] + host_candidates = complete_host_path(drive + tail, ctx) + return [cand.rstrip(" ") for cand in host_candidates] # Subsequent segments. if tail.startswith("/") or tail == "": From 2248d08a5ad813837a53bb51ebf855dacb6ceef0 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Wed, 13 May 2026 13:44:03 +0200 Subject: [PATCH 9/9] fix(build): substitute variables in ARG default values ARG B=${A} stored the literal "${A}" instead of expanding against previously declared ARGs and ENVs, so an ENV/RUN that referenced $B saw "${A}" verbatim. Match Docker BuildKit semantics by running the default through ctx.substitute at execute time. Also adds a regression test for the reported pattern: ARG APP_HOME=/opt/streamforge ENV APP_HOME=${APP_HOME} which already worked but was uncovered, plus the broken-until-now ARG-default-referencing-earlier-ARG case. --- contree_cli/docker/kw_arg.py | 2 +- tests/test_build.py | 47 ++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/contree_cli/docker/kw_arg.py b/contree_cli/docker/kw_arg.py index 72e7cd9..1f16d08 100644 --- a/contree_cli/docker/kw_arg.py +++ b/contree_cli/docker/kw_arg.py @@ -38,4 +38,4 @@ def serialize(self) -> str: def execute(self, ctx: BuildContext) -> None: ctx.declared_args.add(self.name) if self.default is not None and self.name not in ctx.arg_defaults: - ctx.arg_defaults[self.name] = self.default + ctx.arg_defaults[self.name] = ctx.substitute(self.default) diff --git a/tests/test_build.py b/tests/test_build.py index 24ab018..fb54b8c 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -354,6 +354,53 @@ def test_build_arg_substitutes_in_run(self, context_dir, db_path): spawn_body = json.loads(tc.get_request(1).body.decode()) assert spawn_body["command"] == "echo 2.5" + def test_arg_default_flows_into_env(self, context_dir, db_path): + write_dockerfile( + context_dir, + "FROM tag:ubuntu:latest\n" + "ARG APP_HOME=/opt/streamforge\n" + "ENV APP_HOME=${APP_HOME}\n" + "RUN echo $APP_HOME\n", + ) + tc = ContreeTestClient() + run_build( + tc, + BuildArgs(context=str(context_dir)), + [ + make_tag_lookup(BASE_IMG), + make_spawn(), + make_op_success(NEW_IMG), + ], + db_path, + ) + spawn_body = json.loads(tc.get_request(1).body.decode()) + assert spawn_body["command"] == "echo /opt/streamforge" + assert spawn_body["env"] == {"APP_HOME": "/opt/streamforge"} + + def test_arg_default_referencing_earlier_arg(self, context_dir, db_path): + write_dockerfile( + context_dir, + "FROM tag:ubuntu:latest\n" + "ARG ROOT=/opt\n" + "ARG APP_HOME=${ROOT}/streamforge\n" + "ENV APP_HOME=${APP_HOME}\n" + "RUN echo $APP_HOME\n", + ) + tc = ContreeTestClient() + run_build( + tc, + BuildArgs(context=str(context_dir)), + [ + make_tag_lookup(BASE_IMG), + make_spawn(), + make_op_success(NEW_IMG), + ], + db_path, + ) + spawn_body = json.loads(tc.get_request(1).body.decode()) + assert spawn_body["command"] == "echo /opt/streamforge" + assert spawn_body["env"] == {"APP_HOME": "/opt/streamforge"} + class TestSessionKey: def test_deterministic(self, tmp_path):