diff --git a/.gitignore b/.gitignore index 04e7774..7a0b252 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ -configs +/configs gitopsctl .tmp_gitopsctl* dist/ coverage.out coverage.html +# Ignore runtime configs in test packages +internal/**/configs +# But don't ignore examples +!examples/configs diff --git a/README.md b/README.md index 3977b02..56ef24e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# GitOpsCTL: A Lightweight GitOps Control Plane for Kubernetes +# GitOpsCTL

GitOpsCTL Logo @@ -6,292 +6,102 @@ [![Build Status](https://github.com/aeswibon/gitopsctl/actions/workflows/ci.yml/badge.svg)](https://github.com/aeswibon/gitopsctl/actions/workflows/ci.yml) [![Go Report Card](https://goreportcard.com/badge/github.com/aeswibon/gitopsctl)](https://goreportcard.com/report/github.com/aeswibon/gitopsctl) -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -**GitOpsCTL** (GitOps Control Tool) is a minimalistic, self-hosted, and externally managed GitOps controller written in Go. Designed to complement existing tools like ArgoCD and FluxCD, GitOpsCTL offers a simpler, more flexible alternative for Kubernetes application deployments, especially suited for smaller teams, edge environments, or scenarios requiring fine-grained external control. +GitOpsCTL is a lightweight, external GitOps control plane for Kubernetes. It watches Git repositories, renders plain YAML, Kustomize, or Helm manifests, applies them to registered clusters, and exposes a CLI, REST API, Server-Sent Events stream, Prometheus metrics, JSONL event log, webhooks, and an interactive terminal dashboard. -## Goals +Unlike in-cluster GitOps controllers, GitOpsCTL can run from a laptop, CI runner, bastion host, management VM, or container while managing one or more remote Kubernetes clusters through kubeconfig files. -The project exists to provide a **small, explicit GitOps loop**: desired state lives in Git; GitOpsCTL **watches that Git**, **applies Kubernetes manifests** to **named clusters**, and exposes a **CLI** as the main operator surface, plus **HTTP and integration hooks** so automation and **your own dashboards** can register workloads, trigger syncs, inspect status, and subscribe to events—without requiring a full in-cluster control plane such as Argo CD or Flux. +## What It Does -In one sentence: **GitOpsCTL keeps Kubernetes aligned with Git using a minimal external controller.** +- Watches application Git repositories on a configurable interval. +- Applies raw Kubernetes YAML, Kustomize overlays, or Helm charts. +- Supports automatic sync and manual approval workflows. +- Manages multiple clusters from one controller process. +- Restricts cluster writes to configured namespaces when `allowedNamespaces` is set. +- Decrypts SOPS-encrypted YAML, YML, and JSON manifests before apply. +- Tracks app and cluster status in local JSON config files. +- Exposes an API and TUI for operational commands and live status. +- Emits integration events to SSE, JSONL files, and HTTP webhooks. +- Publishes Prometheus metrics for syncs, cluster health, app health, Git pulls, and Kubernetes applies. -Everything beyond that loop (bundled UI, heavy plugins, webhook-primary Git ingest, advanced policy) is optional evolution **after** reconciliation, observability contracts, and operations are reliable and well documented. +## Quick Start -## Who this is for +Prerequisites: -- **Platform and DevOps engineers** who want Git-as-source-of-truth deploys with a thin controller they can run beside their existing toolchain. -- **SREs and on-call** who need logs, status, and a way to confirm what revision synced—or to kick a sync without digging through cluster internals only. -- **Small teams, edge, or local Kubernetes setups** where a lightweight external reconciler is easier to own than a large GitOps stack in-cluster. -- **Automation authors** integrating registration, sync, health checks, or future **event sinks** (webhooks, streams) from pipelines, agents, or custom backends—often alongside **`--output json`** on the CLI. - -### Who this is not for (today) - -- Teams that need **DR admission hooks** or **deep multi-tenant RBAC on the control plane** out of the box—those may land later; compare with mature GitOps products if that is your baseline. - -## Table of Contents - -- [🎯 Goals](#goals) -- [👥 Who this is for](#who-this-is-for) -- [🚀 Why GitOpsCTL?](#why-gitopsctl) -- [✨ Features (Phase 1)](#features-phase-1) -- [🏗️ Architecture Goals](#architecture-goals) -- [🏁 Getting Started](#getting-started) - - [Prerequisites](#prerequisites) - - [Clone the Repository](#clone-the-repository) - - [Install Dependencies & Build](#install-dependencies--build) -- [📖 Usage](#usage) - - [Register a cluster](#register-a-cluster) - - [Register an application](#register-an-application) - - [Check application status](#check-application-status) - - [Start the controller](#start-the-controller) - - [Example workflow](#example-workflow) -- [⚙️ Configuration](#configuration) -- [📂 Project structure](#project-structure) -- [➡️ Next steps (future phases)](#next-steps-future-phases) -- [Phase 2 roadmap (CLI-first & integrations)](docs/phase2.md) -- [🤝 Contributing](#contributing) -- [📄 License](#license) - -## Why GitOpsCTL? - -Traditional GitOps tools are powerful but can be resource-intensive, opinionated, or tightly coupled to the cluster they manage. GitOpsCTL addresses these concerns by being: - -- **Lightweight**: Built with Go for efficiency and minimal overhead. -- **External**: Manages deployments from outside your Kubernetes cluster(s), providing a single control plane for multiple environments. -- **GitOps-Driven**: Continuously watches Git repositories for desired state and applies changes to target clusters. -- **Complementary**: Provides a simpler reconciliation loop, allowing you to build custom deployment logic on top of a solid GitOps foundation. - -## Features (Phase 1) - -A concrete **Phase 1 checklist** (what is done vs still recommended before calling Phase 1 “complete”) lives in [docs/phase1.md](docs/phase1.md). - -This phase focuses on the core reconciliation loop and operational APIs: - -- **CLI for apps and clusters**: Register applications (Git URL, manifest path, poll interval, target cluster) and register multiple Kubernetes clusters (kubeconfig-backed) via command-line subcommands. -- **Git polling**: Periodically checks registered Git repositories for manifest changes. -- **Kubernetes manifest sync**: Applies YAML manifests to target cluster(s) with client-go when Git moves ahead. -- **REST API**: Manage applications and clusters and trigger sync or cluster checks over HTTP (`gitopsctl start` serves `/api/v1` by default on `:8080`; use `--api-address` to change the bind address). -- **Logging and status**: Structured logs and CLI commands to inspect registration and sync status. - -## Enterprise Hardening & Observability (Phase 4) - -GitOpsCTL has been hardened for production-grade workflows: - -- **Strict Testing Standards**: 80% unit test coverage enforced in CI and local pre-push hooks. -- **Native Helm & Kustomize Support**: Renders Helm charts and Kustomizations in-memory without needing external binaries. -- **Mozilla SOPS Integration**: Automatically decrypts secrets stored in Git using AWS KMS, GCP KMS, PGP, etc. -- **Manual Approval Workflow**: Optional `manual` sync policy to pause deployments until a human approves the commit hash. -- **Notification Webhooks**: Per-application webhooks to notify external systems (Slack, Discord, etc.) on sync status changes. -- **Prometheus Metrics**: High-resolution metrics exposed at `/metrics` for monitoring sync duration, failures, and cluster health. - -## Architecture Goals - -GitOpsCTL is built with a clear architectural vision: - -- **External Control Plane**: Operates outside the Kubernetes cluster, offering a broader view and management capabilities. -- **Reconciler Pattern**: Continuously aligns the actual state of your applications in Kubernetes with the desired state defined in Git. -- **Modular design**: Git operations, Kubernetes apply, reconciliation, and HTTP API are separated so each can evolve without collapsing into one blob. -- **Go-Native**: Leverages Go's concurrency model and client-go for efficient Kubernetes interactions. - -## Getting Started - -### Prerequisites - -- **Go (1.24+)**: Match `go.mod`; install Go on your system. -- **Git**: Ensure Git is installed and configured on your machine. -- **Kubernetes Cluster**: A running Kubernetes cluster. - - **For Mac users**: We highly recommend OrbStack for a fast and lightweight local Kubernetes environment. Enable Kubernetes in OrbStack's settings. - - Ensure your kubectl is configured to connect to your cluster (e.g., via ~/.kube/config). - -### Install via Homebrew (macOS / Linux) - -The easiest way to install GitOpsCTL is using Homebrew: - -```bash -brew install aeswibon/gitopsctl/gitopsctl -``` - -### Install from Source - -**Clone the Repository:** - -```bash -git clone https://github.com/aeswibon/gitopsctl.git -cd gitopsctl -``` - -### Install Dependencies & Build +- A Kubernetes cluster reachable through `kubectl`. +- A kubeconfig file for that cluster. +- GitOpsCTL installed. See [Installation](docs/installation.md). ```bash -go mod tidy -go build -o gitopsctl . +# 1. Register a cluster. +gitopsctl register-cluster \ + --name local-dev \ + --kubeconfig ~/.kube/config \ + --allowed-namespaces demo + +# 2. Register the example nginx app. +gitopsctl register-apps \ + --name nginx-demo \ + --repo https://github.com/aeswibon/gitopsctl.git \ + --branch main \ + --path examples/manifests \ + --cluster local-dev \ + --interval 30s \ + --sync-policy auto + +# 3. Start the controller and API server. +gitopsctl start --api-address :8080 + +# 4. In another terminal, open the dashboard. +gitopsctl dashboard --api-url http://127.0.0.1:8080 ``` -This will create an executable binary named gitopsctl in your current directory. - -## Usage - -### Register a cluster - -Applications deploy to a **named cluster** that must exist in `configs/clusters.json`. Register one first (example uses your default kubeconfig): - -```bash -./gitopsctl register-cluster \ - --name production \ - --kubeconfig ~/.kube/config +To start from checked-in sample config files instead of registering resources manually, see [Examples](examples/README.md). + +## Documentation + +- [Getting Started](docs/getting-started.md): First local sync, dashboard, status checks, and cleanup. +- [Installation](docs/installation.md): Install from releases, Go, Docker, or source. +- [Configuration](docs/configuration.md): Complete `applications.json` and `clusters.json` reference. +- [CLI Reference](docs/cli-reference.md): Commands, flags, and common workflows. +- [Architecture](docs/architecture.md): Controller, API, event bus, reconciliation, and storage model. +- [Terminal Dashboard](docs/features/tui.md): TUI views and keyboard controls. +- [Security](docs/features/security.md): Kubeconfig hygiene, namespace restrictions, RBAC, and SOPS. +- [SOPS](docs/SOPS.md): Secret encryption and decryption setup. +- [Observability](docs/features/observability.md): Metrics, events, JSONL audit logs, webhooks, and SSE. +- [Troubleshooting](docs/troubleshooting.md): Common setup and runtime failures. + +## Repository Layout + +```text +cmd/ Cobra CLI commands +internal/api/ REST API, SSE stream, metrics endpoint +internal/controller/ Reconciliation loop and command dispatch +internal/core/app/ Application model and persistence +internal/core/cluster/ Cluster model and persistence +internal/core/git/ Git clone, pull, and commit helpers +internal/core/k8s/ Kubernetes client, render, apply, health logic +internal/events/ Event bus, history, stream, file, webhook sinks +internal/tui/ Bubble Tea terminal dashboard +docs/ User and architecture documentation +examples/ Runnable sample configs and manifests +configs/ Default local runtime config directory ``` -Short flags: `-n` for name, `-k` for kubeconfig. Optional: `--context`, `--test` to verify connectivity, `--dry-run`, `--force`. - -Clusters are stored in `configs/clusters.json`. - -### Register an application - -Point at a Git repo, manifest path **within that repo**, **cluster name** (must match a registered cluster), and poll interval: +## Development ```bash -./gitopsctl register-apps \ - --name my-nginx-app \ - --repo https://github.com/your-github-user/your-gitops-repo.git \ - --path k8s/manifests/nginx \ - --cluster production \ - --interval 30s +go test ./... +go test ./... -coverprofile=coverage.out +go tool cover -func=coverage.out ``` -Short flags: `-n` name, `-r` repo, `-p` path, `-c` cluster, `-i` interval. Optional: `-b`/`--branch` (default `main`), `--dry-run`, `--force`. - -After registration, `configs/applications.json` is created or updated. - -### Check application status - -Inspect registered applications (status, last synced commit, messages): - -```bash -./gitopsctl status-apps -``` - -Use flags such as `--output json`, `--details`, or `--sort-by name` for different views. - -### Start the controller - -Run the main controller to begin the GitOps reconciliation loop: - -```bash -./gitopsctl start -``` - -The controller starts polling registered Git repositories and applying changes to your clusters. An HTTP API is started alongside it (default listen address `:8080`; override with `--api-address`, for example `--api-address 127.0.0.1:9090`). You'll see logs in your terminal indicating activity. - -**Phase 2 — integration events (optional):** append JSON lines to a file and/or POST to a webhook so external dashboards can react: - -```bash -./gitopsctl start \ - --events-file configs/events.jsonl \ - --events-webhook https://example.com/hooks/gitops \ - --events-webhook-bearer "$TOKEN" \ - --events-webhook-secret "$HMAC_SECRET" \ - --events-webhook-retries 3 \ - --events-webhook-backoff 1s -``` - -Follow the file from another terminal: `./gitopsctl tail-events --file configs/events.jsonl`. Event schema: [docs/integrations.md](docs/integrations.md). - -**Calls into a running controller** (same as the HTTP API; set `--api-url` if the API is not at `http://127.0.0.1:8080`): - -- `./gitopsctl sync-app -n ` — request an immediate application sync. -- `./gitopsctl check-cluster -n ` — request an immediate cluster connectivity check. -- `curl -N http://127.0.0.1:8080/api/v1/events` — subscribe to live SSE events (`event` = type, `data` = envelope JSON). - -To stop the controller, press `Ctrl+C`. It performs a graceful shutdown (including the API server). - -### Example workflow - -1. **Register cluster**: `./gitopsctl register-cluster -n production -k ~/.kube/config` (add `--test` if you want a connectivity check). -2. **Register application**: `./gitopsctl register-apps -n my-nginx-app -r -p k8s/manifests/nginx -c production -i 30s`. -3. **Start**: Run `./gitopsctl start`. Observe the initial deployment of your manifests to Kubernetes. Verify with `kubectl get all -n `. -4. **Modify**: Change a manifest in Git (for example image tag or replicas). -5. **Commit and push**: Push to the branch your app tracks (default `main` unless you set `-b`). -6. **Observe**: Within the poll `--interval`, GitOpsCTL detects the update, pulls, and applies. Confirm with `./gitopsctl status-apps` and `kubectl`. - -## Configuration - -Application definitions are stored in `configs/applications.json`. Cluster registrations are stored in `configs/clusters.json`. You can inspect or edit these files manually, but using the CLI (or API) keeps shape and validation consistent. - -```json -[ - { - "name": "my-nginx-app", - "repoURL": "https://github.com/your-github-user/your-gitops-repo.git", - "branch": "main", - "path": "k8s/manifests/nginx", - "clusterName": "production", - "interval": "30s", - "lastSyncedGitHash": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0", - "status": "Synced", - "message": "Successfully synced to a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0" - } -] -``` - -## Project structure - -```txt -gitopsctl/ -├── main.go # Entry: delegates to cmd -├── cmd/ # Cobra CLI (app/cluster register, list, status, start, …) -├── internal/ -│ ├── api/ # Echo HTTP server and /api/v1 handlers -│ ├── controller/ # Reconciliation loop and controller commands -│ ├── core/ # Domains: app, cluster, git, k8s (load/save, integrations) -│ ├── common/ # Shared types and validation helpers -│ └── utils/ # CLI helpers (flags, list runners, …) -└── configs/ # Created at runtime - ├── applications.json # Registered applications - └── clusters.json # Registered clusters -``` - -## Next steps (future phases) - -Development is phased; some items below already exist in code. - -### Phase 2: CLI-first operations and integrations (current direction) - -**GitOpsCTL stays a CLI tool.** We intend to expose **the full set of capabilities through the CLI** (including anything today reachable only via HTTP while `start` is running). The optional REST API remains a **machine interface** for automation, not a replacement for the CLI. - -**We do not plan to ship an official web dashboard.** Instead, Phase 2 focuses on **stable, listenable signals** (documented events, optional webhooks or streams, script-friendly JSON) so you can build **your own** dashboards and integrations on top. Details and suggested deliverables: [docs/phase2.md](docs/phase2.md). - -### Baseline already in the tree - -- REST API for apps and clusters under `/api/v1` when the controller is running (`gitopsctl start`). -- Multiple kubeconfig-backed clusters from one controller process. -- Git polling today; Git **push** webhooks as a sync accelerator remain a later enhancement (see Phase 2 doc). - -### Phase 3: Engines and advanced sync (no required UI) - -- Advanced sync strategies (manual approval, scheduled syncs, richer policies). -- Deeper extensibility: Helm/OCI and templating engines where they fit the architecture. -- Notification and integration patterns built on Phase 2 event hooks—not a bundled UI unless the community explicitly chooses otherwise later. - -## 🤝 Contributing - -We welcome contributions! To ensure a high bar for code quality, please follow these steps: - -1. **Local Pre-commit/Pre-push Hooks**: We use `pre-commit` to run linting and tests. Coverage checks are enforced on push. - ```bash - # Install pre-commit - pip install pre-commit - # Install the hooks (including pre-push) - pre-commit install --hook-type pre-commit --hook-type pre-push - ``` -2. **Quality Standards**: - - All code must maintain a minimum of **80% unit test coverage**. - - Ensure all tests pass (`go test ./...`) and the linter is happy (`golangci-lint run`). -3. **PR Template**: Follow the provided Pull Request template when submitting changes. +The project expects tests for new behavior and keeps coverage high across core packages. +## Contributing -Please review our [Contributing Guidelines](CONTRIBUTING.md) and [Code of Conduct](CODE_OF_CONDUCT.md). If you discover a security vulnerability, please see our [Security Policy](SECURITY.md). +Contributions are welcome. Please read [CONTRIBUTING.md](CONTRIBUTING.md), run the test suite, and keep docs updated when changing commands, flags, config fields, or user-visible behavior. ## License -This project is licensed under the MIT License. See the `LICENSE` file for details. +Add a repository license file before publishing or packaging GitOpsCTL for external distribution. diff --git a/coverage.html b/coverage.html deleted file mode 100644 index 2200c51..0000000 --- a/coverage.html +++ /dev/null @@ -1,6993 +0,0 @@ - - - - - - cmd: Go Coverage Report - - - -

- -
- not tracked - - not covered - covered - -
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - diff --git a/docs/SOPS.md b/docs/SOPS.md index fe3efd4..106aabc 100644 --- a/docs/SOPS.md +++ b/docs/SOPS.md @@ -1,43 +1,117 @@ -# SOPS Secret Management in GitOpsCTL +# SOPS Secret Management -GitOpsCTL provides native support for [SOPS (Secrets Operations)](https://github.com/getsops/sops), allowing you to store encrypted secrets in your Git repository. +GitOpsCTL can decrypt SOPS-encrypted manifests before applying them to Kubernetes. This lets you keep encrypted Secrets in Git while applying plaintext only inside the controller's temporary working directory. -## How it Works +## How Decryption Works -The GitOpsCTL controller automatically detects SOPS-encrypted files in your application's manifest directory. During each synchronization: +During each application sync: -1. The repository is cloned/pulled to a temporary directory. -2. The controller walks through the directory and identifies encrypted files (`.yaml`, `.yml`, `.json`). -3. Files containing SOPS metadata are decrypted in-place using the SOPS library. -4. The decrypted manifests are then applied to the Kubernetes cluster. -5. The temporary directory is cleaned up immediately after synchronization. +1. GitOpsCTL clones or pulls the application repository into a temporary directory. +2. It walks the configured manifest path. +3. It attempts SOPS decryption for `.yaml`, `.yml`, and `.json` files. +4. Files that are encrypted are written back decrypted inside the temporary checkout. +5. The manifest engine renders Helm, Kustomize, or raw YAML. +6. Kubernetes resources are applied. +7. The temporary checkout is removed after reconciliation. + +Unencrypted files are left unchanged. ## Supported Providers -Since GitOpsCTL uses the SOPS library directly, it supports all providers that SOPS supports, provided the environment is configured correctly on the host running the controller: +GitOpsCTL uses the SOPS library, so it can use the providers supported by SOPS when the controller environment is configured correctly: + +- Age +- PGP +- AWS KMS +- GCP KMS +- Azure Key Vault +- HashiCorp Vault, when configured through SOPS + +## Age Example + +Create a key: + +```bash +age-keygen -o age.key +export SOPS_AGE_KEY_FILE="$PWD/age.key" +``` + +Create `.sops.yaml`: + +```yaml +creation_rules: + - path_regex: .*\.sops\.ya?ml$ + age: age1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +``` + +Encrypt a Kubernetes Secret: + +```bash +kubectl create secret generic demo-secret \ + --namespace demo \ + --from-literal=password=change-me \ + --dry-run=client \ + -o yaml > secret.yaml + +sops --encrypt secret.yaml > secret.sops.yaml +rm secret.yaml +``` + +Commit `secret.sops.yaml` and `.sops.yaml`. Do not commit `age.key`. + +Run GitOpsCTL with access to the key: + +```bash +export SOPS_AGE_KEY_FILE=/secure/path/age.key +gitopsctl start +``` + +## PGP Example + +```bash +sops --encrypt --pgp secret.yaml > secret.sops.yaml +``` + +The controller host must have the matching private key available to GPG. + +## Cloud KMS Examples + +AWS: -- **PGP**: Ensure GPG is installed and the private key is in the keyring. -- **AWS KMS**: Ensure the controller has AWS credentials with `kms:Decrypt` permissions. -- **GCP KMS**: Ensure the controller has GCP credentials with `cloudkms.cryptoKeyVersions.useToDecrypt` permissions. -- **Azure Key Vault**: Ensure the controller is authenticated with Azure. -- **Age**: Ensure the `SOPS_AGE_KEY_FILE` or `SOPS_AGE_KEY` environment variables are set. +```bash +sops --encrypt --kms arn:aws:kms:us-east-1:123456789012:key/ secret.yaml > secret.sops.yaml +``` -## Configuration +GCP: -No special configuration is needed in GitOpsCTL. As long as your files are encrypted with SOPS and the environment where the controller runs has access to the decryption keys, it will work automatically. +```bash +sops --encrypt --gcp-kms projects//locations//keyRings//cryptoKeys/ secret.yaml > secret.sops.yaml +``` -### Example: Encrypting a Secret +Azure: ```bash -# Encrypt a secret using a PGP key -sops --encrypt --pgp secret.yaml > secret.enc.yaml +sops --encrypt --azure-kv https://.vault.azure.net/keys// secret.yaml > secret.sops.yaml +``` + +The controller process must have decrypt permissions through its runtime identity or mounted credentials. + +## File Naming + +GitOpsCTL does not require a specific encrypted filename suffix. Any `.yaml`, `.yml`, or `.json` file containing SOPS metadata can be decrypted. + +Recommended convention: -# Encrypt using AWS KMS -sops --encrypt --kms secret.yaml > secret.enc.yaml +```text +secret.sops.yaml +config.sops.json ``` -## Security Best Practices +## Safety Checklist -1. **Least Privilege**: Ensure the controller's identity (e.g., IAM Role, ServiceAccount) only has the minimum permissions required to decrypt the specific keys used for your GitOps repo. -2. **Key Rotation**: Regularly rotate your encryption keys. SOPS makes it easy to re-encrypt files with new keys. -3. **No Plaintext in Git**: Never commit decrypted secrets to your Git repository. Always use SOPS to encrypt them before pushing. +- Commit only encrypted secret files. +- Keep decryption keys and cloud credentials outside the repo. +- Run the controller with least-privilege decrypt permissions. +- Use Kubernetes RBAC and `allowedNamespaces` to limit blast radius. +- Rotate keys and re-encrypt secrets when access changes. +- Confirm decrypted files are not produced in your working tree before committing. diff --git a/docs/adr/0001-events-delivery-and-compatibility.md b/docs/adr/0001-events-delivery-and-compatibility.md deleted file mode 100644 index 347358f..0000000 --- a/docs/adr/0001-events-delivery-and-compatibility.md +++ /dev/null @@ -1,86 +0,0 @@ -# ADR 0001: Events Delivery Guarantees and Compatibility Policy - -- Status: Accepted -- Date: 2026-05-06 -- Decision owners: GitOpsCTL maintainers -- Related docs: [phase2.md](../phase2.md), [integrations.md](../integrations.md) - -## Context - -Phase 2 introduces integration events for custom dashboards and automations. External consumers need clear, stable expectations for: - -1. Delivery behavior per sink (drop/retry/order) -2. Event schema compatibility over time -3. Upgrade/deprecation policy for event types and fields - -Without explicit guarantees, integrators may assume stronger semantics than GitOpsCTL provides and build brittle receivers. - -## Decision - -### 1) Delivery guarantees by sink - -- **JSONL file sink (`--events-file`)** - - Delivery model: **best-effort append** from process memory to local file. - - Ordering: process append order. - - Durability: each record is appended and synced by the sink implementation; records already written remain after process restart. - - Failure mode: write failures are logged; processing continues. - -- **Webhook sink (`--events-webhook`)** - - Delivery model: **bounded at-least-once attempt semantics**. - - Retries: transient failures (network errors, `5xx`, `429`) retry with exponential backoff up to configured attempts. - - Non-retryable errors: most other `4xx` fail fast. - - Ordering: no global ordering guarantee across all events. - - Receiver requirement: idempotency by `id` (`X-GitOpsctl-Event-ID`). - -- **SSE stream (`GET /api/v1/events`)** - - Delivery model: **best-effort live stream only**. - - No replay/persistence guarantee. - - Slow clients may miss messages due to bounded in-memory buffers. - -### 2) Envelope and schema compatibility - -- Envelope field `specversion` is currently **`1.0`**. -- For `specversion: "1.0"`: - - Adding optional fields in `data` is allowed. - - Reordering JSON object fields is allowed. - - Existing field names and meanings are not changed. - - Existing event `type` strings remain stable. - -### 3) Breaking changes policy - -A change is breaking if it removes/renames fields, changes type semantics incompatibly, or changes stable event type names. - -For breaking changes: - -1. Introduce a new envelope version (for example `specversion: "2.0"`). -2. Keep old behavior available for a deprecation window of at least **2 minor releases**. -3. Document migration and examples in `docs/integrations.md`. - -### 4) Security requirements for webhooks - -- Use HTTPS in production. -- Use `--events-webhook-secret` to sign payloads. -- Receivers should verify: - - HMAC signature (`X-GitOpsctl-Signature`) - - timestamp freshness (`X-GitOpsctl-Timestamp`) - - idempotency by event id. - -## Consequences - -### Positive - -- Integrators can safely design resilient consumers (idempotent webhook handlers, tolerant SSE clients). -- Maintainers have a clear compatibility contract for future event evolution. -- Product stance remains CLI-first without coupling to a first-party dashboard. - -### Trade-offs - -- No strict exactly-once guarantee. -- SSE is not suitable as a durable queue. -- Maintaining compatibility windows increases maintenance overhead for future major changes. - -## Future considerations - -- Optional dead-letter/replay mechanism for failed webhooks. -- Explicit versioned event-type namespaces for future major revisions. -- Optional persisted event log endpoint for recovery use cases. diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..e414c46 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,150 @@ +# Architecture + +GitOpsCTL is an external GitOps controller. It can run outside the Kubernetes clusters it manages and uses kubeconfig files to connect to those clusters. + +## System Diagram + +```mermaid +flowchart LR + Git["Git repositories"] --> Controller["GitOpsCTL controller"] + Controller --> Renderer["YAML / Kustomize / Helm renderer"] + Renderer --> K8s["Kubernetes API servers"] + Controller --> Store["configs/*.json"] + Controller --> EventBus["Event bus"] + EventBus --> SSE["SSE stream"] + EventBus --> JSONL["JSONL event file"] + EventBus --> Webhook["Webhook sink"] + API["REST API"] --> Controller + TUI["Terminal dashboard"] --> API + CLI["API-backed CLI commands"] --> API + Prom["Prometheus"] --> Metrics["/metrics"] + Metrics --> API +``` + +## Runtime Components + +### CLI + +The CLI is built with Cobra. It handles: + +- Cluster registration and removal. +- Application registration and removal. +- Status commands. +- API-backed sync, approval, health-check, and dashboard commands. +- Controller startup. + +### Controller + +The controller owns reconciliation. On `gitopsctl start`, it: + +1. Loads `configs/applications.json`. +2. Loads `configs/clusters.json`. +3. Starts the API server. +4. Starts cluster health checking. +5. Starts a reconciliation worker for each registered app. +6. Watches the app config file for changes and reloads application definitions. + +### Git Engine + +For each app, GitOpsCTL clones or pulls the configured repository and records the latest commit hash. Sync decisions compare: + +- Latest discovered hash. +- Last successfully synced hash. +- Approved hash for manual apps. + +### Manifest Engine + +GitOpsCTL detects the application manifest mode from the configured `path`: + +- Helm chart when `Chart.yaml` or `Chart.yml` exists. +- Kustomize overlay when `kustomization.yaml`, `kustomization.yml`, or `Kustomization` exists. +- Raw YAML for recursive `.yaml` and `.yml` files otherwise. + +SOPS decryption runs before render/apply. + +### Kubernetes Client + +The Kubernetes client wraps `client-go` dynamic clients and REST mapping. It: + +- Maps YAML resources to Kubernetes API resources. +- Defaults missing namespaces on namespaced resources to `default`. +- Enforces `allowedNamespaces` when configured on the target cluster. +- Creates missing resources and updates existing resources. +- Tracks applied resource metadata for later health checks. + +### API Server + +The API server exposes: + +- Application and cluster management endpoints. +- Sync, approval, and cluster check endpoints. +- Health endpoint. +- SSE event stream. +- Prometheus metrics endpoint. + +The dashboard and API-backed CLI commands use this server. + +### Event Bus + +The event bus fans out integration events to configured sinks: + +- In-memory history for API consumers. +- SSE stream for the dashboard. +- JSONL file for audit trails. +- HTTP webhook for external systems. + +## Storage Model + +GitOpsCTL intentionally uses simple JSON files as its local store: + +- `configs/applications.json` +- `configs/clusters.json` + +The controller updates status fields in these files. Back up the directory or keep generated config under infrastructure management if you run GitOpsCTL on a server. + +## Reconciliation Flow + +```mermaid +sequenceDiagram + participant C as Controller + participant G as Git + participant R as Renderer + participant K as Kubernetes + participant S as Store + participant E as Event Bus + + C->>G: clone or pull repo + G-->>C: latest commit hash + C->>C: evaluate sync policy + alt manual policy not approved + C->>S: save OutOfSync status + C->>E: emit sync required event + else apply allowed + C->>R: decrypt and render manifests + R-->>C: Kubernetes objects + C->>K: create or update resources + K-->>C: apply results + C->>S: save status, hash, resources + C->>E: emit success or failure + end +``` + +## Codebase Layout + +```text +main.go Entry point +cmd/ Cobra commands +internal/api/ REST API, validation, SSE stream +internal/controller/ Reconciliation loop and command dispatch +internal/core/app/ Application model and persistence +internal/core/cluster/ Cluster model and persistence +internal/core/git/ Git operations +internal/core/k8s/ Kubernetes render, apply, health logic +internal/core/sops/ SOPS decryption helpers +internal/events/ Event envelope, bus, sinks +internal/metrics/ Prometheus metrics +internal/tui/ Bubble Tea dashboard +internal/utils/ CLI rendering helpers +docs/ Documentation +examples/ Example configs and manifests +``` diff --git a/docs/cli-reference.md b/docs/cli-reference.md new file mode 100644 index 0000000..e08ea87 --- /dev/null +++ b/docs/cli-reference.md @@ -0,0 +1,196 @@ +# CLI Reference + +This page summarizes the user-facing GitOpsCTL commands. Run `gitopsctl --help` for the exact help text from your build. + +## Global Flags + +| Flag | Default | Description | +|------|---------|-------------| +| `--api-url` | `http://127.0.0.1:8080` | Base URL used by commands that talk to a running controller API. | +| `--events-file` | empty | Append command/controller integration events as JSON lines. | +| `--events-webhook` | empty | POST integration events to an HTTP endpoint. | +| `--events-webhook-bearer` | empty | Bearer token for webhook requests. | +| `--events-webhook-secret` | empty | HMAC signing secret for webhook requests. | +| `--events-webhook-retries` | `2` | Retry attempts for webhook events. | +| `--events-webhook-backoff` | `750ms` | Base retry backoff. | +| `--events-webhook-timeout` | `12s` | HTTP timeout per webhook attempt. | + +Global flags come before or after the subcommand depending on Cobra parsing, but placing them before the command is the least surprising form: + +```bash +gitopsctl --api-url http://127.0.0.1:8080 sync-app --name nginx-demo +``` + +## Controller + +### `start` + +Starts the controller and API server. + +```bash +gitopsctl start --api-address :8080 +``` + +Flags: + +| Flag | Default | Description | +|------|---------|-------------| +| `--api-address`, `-a` | `:8080` | Listen address for REST API, SSE, and metrics. | + +## Applications + +### `register-apps` + +Registers an application in `configs/applications.json`. + +```bash +gitopsctl register-apps \ + --name nginx-demo \ + --repo https://github.com/aeswibon/gitopsctl.git \ + --branch main \ + --path examples/manifests \ + --cluster local-dev \ + --interval 30s \ + --sync-policy auto +``` + +Important flags: + +| Flag | Short | Required | Description | +|------|-------|----------|-------------| +| `--name` | `-n` | Yes | Application name. | +| `--repo` | `-r` | Yes | Git repository URL. | +| `--path` | `-p` | Yes | Manifest path inside the repo. | +| `--cluster` | `-c` | Yes | Registered cluster name. | +| `--branch` | `-b` | No | Git branch, default `main`. | +| `--interval` | `-i` | No | Polling interval, default `5m`. | +| `--sync-policy` | | No | `auto` or `manual`, default `auto`. | +| `--webhook-url` | | No | Per-app notification webhook. | +| `--webhook-secret` | | No | Per-app webhook signing secret. | +| `--dry-run` | | No | Preview without saving. | +| `--force` | | No | Overwrite an existing app entry. | + +### `list-apps` + +Lists registered applications. + +```bash +gitopsctl list-apps +``` + +### `status-apps` + +Shows application status and sync metadata. + +```bash +gitopsctl status-apps +``` + +### `sync-app` + +Requests immediate reconciliation through the running API server. + +```bash +gitopsctl --api-url http://127.0.0.1:8080 sync-app --name nginx-demo +``` + +### `approve-app` + +Approves a commit for a manual-sync application. + +```bash +gitopsctl --api-url http://127.0.0.1:8080 approve-app \ + --name nginx-demo \ + --commit +``` + +### `unregister` + +Removes an application registration. + +```bash +gitopsctl unregister --name nginx-demo +``` + +## Clusters + +### `register-cluster` + +Registers a Kubernetes cluster in `configs/clusters.json`. + +```bash +gitopsctl register-cluster \ + --name local-dev \ + --kubeconfig ~/.kube/config \ + --allowed-namespaces demo +``` + +Important flags: + +| Flag | Short | Required | Description | +|------|-------|----------|-------------| +| `--name` | `-n` | Yes | Cluster name. | +| `--kubeconfig` | `-k` | No | Kubeconfig path. Auto-detected from `$KUBECONFIG` or `~/.kube/config` when omitted. | +| `--allowed-namespaces` | | No | Comma-separated namespace allow-list. | +| `--test` | | No | Validate kubeconfig loading during registration. | +| `--dry-run` | | No | Preview without saving. | +| `--force` | | No | Overwrite an existing cluster entry. | + +### `list-clusters` + +Lists registered clusters. + +```bash +gitopsctl list-clusters +``` + +### `status-clusters` + +Shows cluster connectivity status. + +```bash +gitopsctl status-clusters +``` + +### `check-cluster` + +Requests a health check through the running API server. + +```bash +gitopsctl --api-url http://127.0.0.1:8080 check-cluster --name local-dev +``` + +### `unregister-cluster` + +Removes a cluster registration. + +```bash +gitopsctl unregister-cluster --name local-dev +``` + +## Dashboard and Events + +### `dashboard` + +Opens the terminal dashboard. + +```bash +gitopsctl dashboard --api-url http://127.0.0.1:8080 +``` + +### `tail-events` + +Reads a JSONL event file. + +```bash +gitopsctl tail-events --file configs/events.jsonl --from-start +``` + +Useful flags: + +| Flag | Default | Description | +|------|---------|-------------| +| `--file` | `configs/events.jsonl` | Event file path. | +| `--follow` | `true` | Continue reading appended events. | +| `--from-start` | `false` | Print existing lines first. | +| `--poll-interval` | `400ms` | File polling interval. | diff --git a/docs/code_explanation.md b/docs/code_explanation.md deleted file mode 100644 index 7ede359..0000000 --- a/docs/code_explanation.md +++ /dev/null @@ -1,38 +0,0 @@ -# GitOpsCTL codebase overview - -GitOpsCTL is a Go CLI and HTTP API that reconciles **Kubernetes manifests in Git** against **registered clusters** (kubeconfig-backed). The controller runs outside the cluster: it polls Git and applies YAML using client-go. - -For setup and commands, see the [README](../README.md). For Phase 1 closure criteria, see [phase1.md](./phase1.md). For Phase 2 (events, webhooks, CLI/API parity), see [phase2.md](./phase2.md) and [integrations.md](./integrations.md). - -## Layout - -```txt -main.go → cmd.Execute() -cmd/ → Cobra commands (apps, clusters, start) -internal/api/ → Echo server, /api/v1 handlers, validator -internal/controller/ → Reconciliation loop, sync triggers, cluster checks -internal/core/app/ → Application model and persistence -internal/core/cluster/ → Cluster model and persistence -internal/core/git/ → Clone/pull/hash -internal/core/k8s/ → Apply manifests -internal/common/ → Shared types (e.g. API errors) -internal/events/ → Integration event envelope, JSONL + webhook sinks (Phase 2) -internal/utils/ → CLI list helpers and flags -configs/ → Runtime JSON stores (created when you register) -``` - -## Request flow (high level) - -1. **CLI or API** updates in-memory stores and persists JSON under `configs/`. -2. **`gitopsctl start`** loads apps and clusters, starts the **controller** and **API** goroutines. -3. **Controller** runs per-app reconciliation: fetch Git at `interval`, compare commit hash, apply manifests to the app’s `clusterName` kubeconfig; optionally emits **integration events** to configured sinks. -4. **Manual sync** (`API` or `sync-app` CLI) signals the controller; sync runs on the app goroutine; API returns `202 Accepted`. - -## Packages worth reading first - -| Package | Role | -|---------|------| -| `internal/controller` | Orchestration, timeouts, backoff, goroutine lifecycle | -| `internal/core/app`, `internal/core/cluster` | Source of truth structs and file I/O | -| `internal/api/server.go` | Route wiring and middleware | -| `cmd/start.go` | Process lifecycle (signals, shutdown order) | diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..6add0b4 --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,166 @@ +# Configuration + +GitOpsCTL stores runtime state and user configuration in JSON files under `configs/` by default. + +| File | Purpose | +|------|---------| +| `configs/applications.json` | Registered applications, sync policy, status, Git metadata, and applied resources. | +| `configs/clusters.json` | Registered Kubernetes clusters, kubeconfig paths, status, and namespace restrictions. | + +Older examples may refer to `apps.json`. The current default application config file is `configs/applications.json`. + +## Application Configuration + +Applications are stored as a JSON array in `configs/applications.json`. + +```json +[ + { + "name": "nginx-demo", + "repoURL": "https://github.com/aeswibon/gitopsctl.git", + "branch": "main", + "path": "examples/manifests", + "clusterName": "local-dev", + "interval": "30s", + "syncPolicy": "auto", + "status": "Pending", + "message": "Registered, awaiting first sync" + } +] +``` + +### Application Fields + +| Field | Required | Written by | Description | +|-------|----------|------------|-------------| +| `name` | Yes | User | Unique application name. Use DNS-friendly names such as `frontend-prod`. | +| `repoURL` | Yes | User | Git repository URL. HTTPS and SSH Git URLs are supported. | +| `branch` | No | User | Branch to watch. Defaults to `main` when registering through the CLI. | +| `path` | Yes | User | Directory inside the repo containing raw YAML, a Kustomize overlay, or a Helm chart. | +| `clusterName` | Yes | User | Name of a cluster from `configs/clusters.json`. | +| `interval` | No | User | Polling interval such as `30s`, `5m`, or `1h`. CLI validation allows 10 seconds through 24 hours. | +| `syncPolicy` | No | User | `auto` applies discovered commits. `manual` records the latest commit and waits for approval. | +| `approvedGitHash` | No | User/API | Commit approved for a manual sync. Usually set through `gitopsctl approve-app`. | +| `latestGitHash` | No | Controller | Latest commit hash discovered by the controller. | +| `lastSyncedGitHash` | No | Controller | Last commit hash successfully applied. | +| `status` | No | Controller | Current app status such as `Pending`, `Synced`, `Healthy`, `Progressing`, `Degraded`, `OutOfSync`, `Error`, or `Stopped`. | +| `message` | No | Controller | Human-readable status or error detail. | +| `consecutiveFailures` | No | Controller | Count of consecutive sync failures. | +| `webhookUrl` | No | User | Per-application sync notification webhook URL. | +| `webhookSecret` | No | User | Optional HMAC signing secret for per-application webhook notifications. | +| `appliedResources` | No | Controller | Kubernetes resources applied by the most recent sync, used for app health checks. | + +Use `interval`, not `pollingInterval`, in JSON. `pollingInterval` is an internal parsed duration and is not read from config. + +## Cluster Configuration + +Clusters are stored as a JSON array in `configs/clusters.json`. + +```json +[ + { + "name": "local-dev", + "kubeconfigPath": "/Users/you/.kube/config", + "registeredAt": "2026-05-10T10:00:00Z", + "status": "Pending", + "message": "Cluster registered, awaiting validation", + "allowedNamespaces": ["demo"] + } +] +``` + +### Cluster Fields + +| Field | Required | Written by | Description | +|-------|----------|------------|-------------| +| `name` | Yes | User | Unique cluster name referenced by applications. | +| `kubeconfigPath` | Yes | User | Absolute path to the kubeconfig file used by the controller. | +| `registeredAt` | No | CLI | Registration timestamp. | +| `status` | No | Controller | Cluster status such as `Pending`, `Active`, `Unreachable`, or `Error`. | +| `message` | No | Controller | Human-readable cluster status or error detail. | +| `lastCheckedAt` | No | Controller | Last cluster health check timestamp. | +| `allowedNamespaces` | No | User | Optional list of namespaces the controller may write to for this cluster. Empty means unrestricted. | + +When `allowedNamespaces` is set, namespaced manifests outside the list are rejected before apply. + +## Registering Through the CLI + +Prefer CLI registration for day-to-day use because it validates names, paths, URLs, intervals, and kubeconfig locations. + +```bash +gitopsctl register-cluster \ + --name local-dev \ + --kubeconfig ~/.kube/config \ + --allowed-namespaces demo + +gitopsctl register-apps \ + --name nginx-demo \ + --repo https://github.com/aeswibon/gitopsctl.git \ + --branch main \ + --path examples/manifests \ + --cluster local-dev \ + --interval 30s \ + --sync-policy auto +``` + +## Manual Sync Policy + +Manual sync keeps Git discovery separate from cluster apply. + +1. The controller polls Git and records the latest commit hash. +2. If the latest hash is not approved, the app becomes `OutOfSync`. +3. A user approves a commit through the API or CLI. +4. The controller applies only the approved commit. + +```bash +gitopsctl approve-app --name nginx-demo --commit +``` + +## Manifest Directory Detection + +For each application `path`, GitOpsCTL applies one of these modes: + +| Directory contents | Behavior | +|--------------------|----------| +| `Chart.yaml` or `Chart.yml` | Render as a Helm chart in client-only dry-run mode, then apply rendered YAML. | +| `kustomization.yaml`, `kustomization.yml`, or `Kustomization` | Build with Kustomize, then apply generated YAML. | +| Other `.yaml` or `.yml` files | Apply raw YAML files recursively. | + +SOPS-encrypted `.yaml`, `.yml`, and `.json` files are decrypted before render/apply when the controller environment has the required keys. + +## API and Event Flags + +Global flags apply before the subcommand: + +```bash +gitopsctl --api-url http://127.0.0.1:8080 sync-app --name nginx-demo +``` + +Controller flags apply to `start`: + +```bash +gitopsctl start \ + --api-address :8080 \ + --events-file configs/events.jsonl \ + --events-webhook https://example.com/gitopsctl/events \ + --events-webhook-secret "$WEBHOOK_SECRET" +``` + +| Flag | Command | Default | Description | +|------|---------|---------|-------------| +| `--api-url` | global | `http://127.0.0.1:8080` | Base URL used by API-backed commands and dashboard. | +| `--api-address`, `-a` | `start` | `:8080` | Listen address for REST API, SSE, and metrics. | +| `--events-file` | global/start | empty | Append integration events as JSON lines. | +| `--events-webhook` | global/start | empty | POST integration events to an HTTP endpoint. | +| `--events-webhook-bearer` | global/start | empty | Bearer token for event webhook requests. | +| `--events-webhook-secret` | global/start | empty | HMAC signing secret for event webhook payloads. | +| `--events-webhook-retries` | global/start | `2` | Retry attempts for transient webhook failures. | +| `--events-webhook-backoff` | global/start | `750ms` | Base retry backoff. | +| `--events-webhook-timeout` | global/start | `12s` | HTTP timeout per webhook attempt. | + +## Configuration Hygiene + +- Use absolute kubeconfig paths in committed or copied cluster configs. +- Do not commit kubeconfigs, tokens, decrypted secrets, or webhook secrets. +- Keep `allowedNamespaces` narrow for shared clusters. +- Keep examples and documentation in sync with CLI command names and JSON field names. diff --git a/docs/features/observability.md b/docs/features/observability.md new file mode 100644 index 0000000..45164cc --- /dev/null +++ b/docs/features/observability.md @@ -0,0 +1,98 @@ +# Observability + +GitOpsCTL exposes runtime state through CLI status commands, the dashboard, Prometheus metrics, Server-Sent Events, JSONL audit logs, and HTTP webhooks. + +## Status Commands + +```bash +gitopsctl list-apps +gitopsctl status-apps +gitopsctl list-clusters +gitopsctl status-clusters +``` + +API-backed commands need a running controller: + +```bash +gitopsctl --api-url http://127.0.0.1:8080 sync-app --name nginx-demo +gitopsctl --api-url http://127.0.0.1:8080 check-cluster --name local-dev +``` + +## Dashboard + +```bash +gitopsctl dashboard --api-url http://127.0.0.1:8080 +``` + +The dashboard reads app and cluster data from the REST API and refreshes on SSE events. + +## Prometheus Metrics + +The API server exposes Prometheus metrics on the same listen address as the REST API. + +```bash +curl http://127.0.0.1:8080/metrics +``` + +Key metrics: + +| Metric | Type | Labels | Meaning | +|--------|------|--------|---------| +| `gitopsctl_app_sync_total` | Counter | `app`, `cluster`, `status` | Application sync attempts by result. | +| `gitopsctl_cluster_status` | Gauge | `cluster` | `1` for reachable, `0` for unreachable/error. | +| `gitopsctl_app_sync_duration_seconds` | Histogram | `app`, `cluster` | Successful sync duration. | +| `gitopsctl_app_health_status` | Gauge | `app`, `cluster` | `1` healthy, `0.5` progressing, `0` degraded/error. | +| `gitopsctl_k8s_apply_total` | Counter | `app`, `cluster`, `kind`, `status` | Kubernetes resource apply operations. | +| `gitopsctl_git_pull_total` | Counter | `app`, `status` | Git clone/pull operations. | + +## JSONL Event Log + +Enable file events: + +```bash +gitopsctl start --events-file configs/events.jsonl +``` + +Follow events: + +```bash +gitopsctl tail-events --file configs/events.jsonl --from-start +``` + +Each line is a JSON event envelope. This is useful for local audit trails, ingestion into log pipelines, and debugging reconciliation. + +## Webhooks + +Enable event webhooks: + +```bash +gitopsctl start \ + --events-webhook https://example.com/gitopsctl/events \ + --events-webhook-bearer "$TOKEN" \ + --events-webhook-secret "$SIGNING_SECRET" \ + --events-webhook-retries 3 \ + --events-webhook-backoff 1s \ + --events-webhook-timeout 10s +``` + +When a signing secret is set, webhook requests include an HMAC SHA-256 signature header. Receivers should verify the signature before trusting the payload. + +## Server-Sent Events + +The dashboard uses the SSE stream exposed by the API server. You can inspect it manually: + +```bash +curl -N http://127.0.0.1:8080/api/v1/events +``` + +SSE is intended for live local or internal consumers. Use JSONL or webhooks for durable external processing. + +## Operational Checks + +For a healthy controller: + +- `/health` responds successfully. +- `/metrics` returns Prometheus text. +- `status-clusters` shows recent cluster checks. +- `status-apps` shows recent sync status and commit hashes. +- `tail-events` shows controller and sync events when event logging is enabled. diff --git a/docs/features/security.md b/docs/features/security.md new file mode 100644 index 0000000..da2e76f --- /dev/null +++ b/docs/features/security.md @@ -0,0 +1,126 @@ +# Security + +GitOpsCTL can manage powerful Kubernetes credentials, so production setups should treat it like any other deployment controller. + +## Kubeconfig Security + +Use a dedicated kubeconfig for GitOpsCTL instead of a personal admin kubeconfig. + +Recommended practices: + +- Use a dedicated Kubernetes service account. +- Grant only the verbs and resources GitOpsCTL needs. +- Scope permissions by namespace when possible. +- Store kubeconfig files outside the repository. +- Mount kubeconfigs read-only in containers. +- Rotate credentials regularly. + +## Namespace Restrictions + +GitOpsCTL supports an application-layer namespace guard through the cluster `allowedNamespaces` field. + +```json +[ + { + "name": "staging", + "kubeconfigPath": "/etc/gitopsctl/kubeconfig-staging", + "allowedNamespaces": ["staging", "monitoring"] + } +] +``` + +You can also set it with the CLI: + +```bash +gitopsctl register-cluster \ + --name staging \ + --kubeconfig /etc/gitopsctl/kubeconfig-staging \ + --allowed-namespaces staging,monitoring +``` + +Behavior: + +- Empty `allowedNamespaces` means no GitOpsCTL namespace restriction. +- Namespaced resources without `metadata.namespace` default to `default`. +- Namespaced resources outside `allowedNamespaces` are rejected before apply. +- Cluster-scoped resources are not namespace-scoped, so protect them with Kubernetes RBAC. + +This guard complements Kubernetes RBAC. It does not replace RBAC. + +## RBAC Example + +A minimal namespace-scoped role depends on the resources your apps manage. Start narrow and expand intentionally. + +```yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: gitopsctl + namespace: demo +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: gitopsctl-applier + namespace: demo +rules: +- apiGroups: ["", "apps", "batch", "networking.k8s.io"] + resources: ["configmaps", "secrets", "services", "deployments", "statefulsets", "daemonsets", "jobs", "cronjobs", "ingresses"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: gitopsctl-applier + namespace: demo +subjects: +- kind: ServiceAccount + name: gitopsctl + namespace: demo +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: gitopsctl-applier +``` + +If your manifests create namespaces or cluster-scoped resources, you need explicit cluster-level RBAC. Keep that separate from normal app deployment credentials when possible. + +## Secret Management With SOPS + +GitOpsCTL decrypts SOPS-encrypted `.yaml`, `.yml`, and `.json` files during reconciliation when the runtime environment has access to the required key material. + +Supported SOPS providers include: + +- Age +- PGP +- AWS KMS +- GCP KMS +- Azure Key Vault +- HashiCorp Vault, when configured through SOPS + +See [SOPS Secret Management](../SOPS.md) for setup details. + +## Webhook Security + +For controller event webhooks: + +```bash +gitopsctl start \ + --events-webhook https://example.com/gitopsctl/events \ + --events-webhook-bearer "$TOKEN" \ + --events-webhook-secret "$SIGNING_SECRET" +``` + +Use HTTPS endpoints, short-lived tokens when possible, and verify HMAC signatures on the receiver. + +## Configuration Hygiene + +Do not commit: + +- Kubeconfig files. +- Decrypted SOPS secrets. +- Webhook signing secrets. +- Cloud provider credentials. +- Private Age or PGP keys. + +It is safe to commit encrypted SOPS manifests and non-sensitive sample configs. diff --git a/docs/features/tui.md b/docs/features/tui.md new file mode 100644 index 0000000..cc30e4c --- /dev/null +++ b/docs/features/tui.md @@ -0,0 +1,82 @@ +# Terminal Dashboard + +The dashboard is an interactive terminal UI built with Bubble Tea and Lipgloss. It connects to the GitOpsCTL API server and provides a live view of applications and clusters. + +## Start the Dashboard + +Start the controller first: + +```bash +gitopsctl start --api-address :8080 +``` + +Open the dashboard in another terminal: + +```bash +gitopsctl dashboard --api-url http://127.0.0.1:8080 +``` + +## Views + +### Applications + +Shows registered applications, current sync status, cluster assignment, interval, commit hash, failure count, and message. + +Common statuses: + +- `Synced`: Last discovered/approved commit was applied. +- `Healthy`: Applied resources are currently healthy. +- `Progressing`: Applied resources are still converging. +- `Degraded`: At least one applied resource is unhealthy. +- `OutOfSync`: A newer commit exists but has not been applied, usually because manual approval is required. +- `Pending`: App is registered but has not completed reconciliation. +- `Error`: Git, render, Kubernetes, or policy failure occurred. +- `Stopped`: Reconciliation stopped because the controller shut down or the app was stopped. + +### Clusters + +Shows registered clusters, kubeconfig path, connectivity status, last check time, and status message. + +Common statuses: + +- `Active`: Cluster connectivity check succeeded. +- `Unreachable`: API server connection or discovery failed. +- `Pending`: Cluster is registered and awaiting validation. +- `Error`: Client creation or configuration failed. + +## Keyboard Controls + +| Key | Action | +|-----|--------| +| `tab`, `shift+tab` | Switch between applications and clusters. | +| `up`, `k` | Move selection up. | +| `down`, `j` | Move selection down. | +| `r` | Refresh app and cluster data. | +| `s` | Request sync for selected application. | +| `c` | Request health check for selected cluster. | +| `u` | Unregister selected application or cluster. | +| `y` | Confirm pending action. | +| `n`, `esc` | Cancel pending action. | +| `q`, `ctrl+c` | Quit. | + +## Live Updates + +The dashboard starts by fetching application and cluster lists through the REST API. It then listens for Server-Sent Events and refreshes when the controller emits changes. + +If the dashboard cannot connect: + +1. Confirm the controller is running. +2. Confirm `--api-url` matches the controller `--api-address`. +3. Try `curl http://127.0.0.1:8080/health`. +4. Check firewall or container port mappings. + +## When to Use CLI Instead + +The dashboard is ideal for live operations. Use CLI commands for scripts, CI, and repeatable workflows: + +```bash +gitopsctl status-apps +gitopsctl sync-app --name nginx-demo +gitopsctl approve-app --name nginx-demo --commit +gitopsctl check-cluster --name local-dev +``` diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 0000000..a63592b --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,144 @@ +# Getting Started + +This guide walks through a local GitOpsCTL setup using the example nginx manifests in this repository. + +## Prerequisites + +- A Kubernetes cluster such as Kind, Minikube, OrbStack, Docker Desktop, or a remote dev cluster. +- `kubectl` configured for that cluster. +- GitOpsCTL installed. See [Installation](installation.md). +- Network access from the GitOpsCTL process to the Git repository and Kubernetes API server. + +Confirm Kubernetes access: + +```bash +kubectl cluster-info +kubectl get namespace default +``` + +## Option 1: Register Resources With Commands + +This path is best for learning the CLI. + +### 1. Register a Cluster + +```bash +gitopsctl register-cluster \ + --name local-dev \ + --kubeconfig ~/.kube/config \ + --allowed-namespaces demo +``` + +Useful variants: + +```bash +# Auto-detect kubeconfig from $KUBECONFIG or ~/.kube/config. +gitopsctl register-cluster --name local-dev + +# Validate kubeconfig loading during registration. +gitopsctl register-cluster --name local-dev --kubeconfig ~/.kube/config --test + +# Preview without writing configs/clusters.json. +gitopsctl register-cluster --name local-dev --kubeconfig ~/.kube/config --dry-run +``` + +### 2. Register an Application + +```bash +gitopsctl register-apps \ + --name nginx-demo \ + --repo https://github.com/aeswibon/gitopsctl.git \ + --branch main \ + --path examples/manifests \ + --cluster local-dev \ + --interval 30s \ + --sync-policy auto +``` + +This writes the application entry to `configs/applications.json`. The controller later clones the repo, enters `examples/manifests`, decrypts SOPS files when needed, renders the manifests, and applies them to the `local-dev` cluster. + +### 3. Start the Controller + +```bash +gitopsctl start --api-address :8080 +``` + +The controller loads: + +- `configs/applications.json` +- `configs/clusters.json` + +It then starts reconciliation goroutines, the REST API, the SSE event stream, and the Prometheus metrics endpoint. + +### 4. Watch Status + +In another terminal: + +```bash +gitopsctl status-apps +gitopsctl status-clusters +gitopsctl dashboard --api-url http://127.0.0.1:8080 +``` + +You can also inspect Kubernetes directly: + +```bash +kubectl get namespace demo +kubectl get deployment,service -n demo +``` + +### 5. Trigger a Manual Sync + +For automatic apps, this requests an immediate reconciliation: + +```bash +gitopsctl --api-url http://127.0.0.1:8080 sync-app --name nginx-demo +``` + +For manual apps, approve the commit hash shown in app status: + +```bash +gitopsctl --api-url http://127.0.0.1:8080 approve-app \ + --name nginx-demo \ + --commit +``` + +### 6. Clean Up + +```bash +kubectl delete namespace demo +gitopsctl unregister --name nginx-demo +gitopsctl unregister-cluster --name local-dev +``` + +## Option 2: Use Example Config Files + +This path is fastest when working from a local checkout. + +```bash +mkdir -p configs +cp examples/configs/apps.json configs/applications.json +cp examples/configs/clusters.json configs/clusters.json +``` + +Edit `configs/clusters.json` and set `kubeconfigPath` to the absolute path of your kubeconfig. + +Then run: + +```bash +gitopsctl start --api-address :8080 +``` + +## What Success Looks Like + +- `gitopsctl status-clusters` shows the cluster as `Active` or recently checked. +- `gitopsctl status-apps` shows `Synced` after the first successful apply. +- `kubectl get all -n demo` shows the nginx deployment and service. +- `gitopsctl dashboard` lists the app and cluster without connection errors. + +## Next Steps + +- Read [Configuration](configuration.md) for every supported field. +- Read [CLI Reference](cli-reference.md) for command workflows. +- Enable [Observability](features/observability.md) with `--events-file`, webhooks, and metrics. +- Review [Security](features/security.md) before running against shared clusters. diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 0000000..96cee5e --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,104 @@ +# Installation + +GitOpsCTL is distributed as a single Go binary and can also run in a container. + +## Requirements + +- Go 1.25 or newer when building from source. +- `git` available on the machine running the controller. +- Network access to the target Git repositories. +- A kubeconfig that can reach each target Kubernetes cluster. +- Optional: SOPS provider tooling or credentials when using encrypted manifests. + +## Prebuilt Binary + +Download a release archive from the [GitHub Releases](https://github.com/aeswibon/gitopsctl/releases) page. + +```bash +tar -xzf gitopsctl__.tar.gz +chmod +x gitopsctl +sudo mv gitopsctl /usr/local/bin/gitopsctl +gitopsctl --help +``` + +Use the archive that matches your platform, for example Darwin arm64 for Apple Silicon macOS or Linux amd64 for most x86 Linux hosts. + +## Install With Go + +```bash +go install aeswibon.com/github/gitopsctl@latest +``` + +Make sure your Go binary directory is on `PATH`: + +```bash +export PATH="$(go env GOPATH)/bin:$PATH" +gitopsctl --help +``` + +## Build From Source + +```bash +git clone https://github.com/aeswibon/gitopsctl.git +cd gitopsctl +go build -o gitopsctl main.go +./gitopsctl --help +``` + +Run tests: + +```bash +go test ./... +``` + +Run coverage: + +```bash +go test ./... -coverprofile=coverage.out +go tool cover -func=coverage.out +``` + +## Docker + +Pull the image: + +```bash +docker pull ghcr.io/aeswibon/gitopsctl:latest +``` + +Run the controller with local configs and kubeconfig mounted: + +```bash +docker run --rm -it \ + -v "$HOME/.kube/config:/root/.kube/config:ro" \ + -v "$PWD/configs:/app/configs" \ + -p 8080:8080 \ + ghcr.io/aeswibon/gitopsctl:latest \ + start --api-address 0.0.0.0:8080 +``` + +Notes: + +- `kubeconfigPath` inside `configs/clusters.json` must match the path inside the container, such as `/root/.kube/config`. +- Mount SOPS keys or cloud credentials when encrypted manifests need decryption. +- Mount `configs/` as writable because GitOpsCTL persists status back to JSON files. + +## Shell Completion + +Cobra can generate completion scripts if enabled in the binary. Check the current command help: + +```bash +gitopsctl completion --help +``` + +If completion is not present in your build, use normal shell history and aliases until completion support is added. + +## Verify Installation + +```bash +gitopsctl --help +gitopsctl register-cluster --help +gitopsctl start --help +``` + +Then continue with [Getting Started](getting-started.md). diff --git a/docs/integrations.md b/docs/integrations.md deleted file mode 100644 index 5ede631..0000000 --- a/docs/integrations.md +++ /dev/null @@ -1,187 +0,0 @@ -# Integrations — events and dashboards - -GitOpsCTL is **CLI-first**. For custom dashboards and automation, consume **integration events** instead of scraping unstructured logs. - -Delivery guarantees and compatibility policy are defined in [ADR 0001](./adr/0001-events-delivery-and-compatibility.md). - -## Enabling events - -When you run the controller: - -```bash -gitopsctl start \ - --events-file configs/events.jsonl \ - --events-webhook https://your-receiver.example/hooks/gitopsctl \ - --events-webhook-bearer "$TOKEN" \ - --events-webhook-secret "$HMAC_SECRET" \ - --events-webhook-retries 3 \ - --events-webhook-backoff 1s -``` - -- **`--events-file`**: append-only JSON Lines (one JSON object per line). Safe to `tail -f` or use `gitopsctl tail-events`. -- **`--events-webhook`**: each event is POSTed as `application/json`. -- **`--events-webhook-bearer`**: optional `Authorization: Bearer …` header for the webhook. -- **`--events-webhook-secret`**: optional HMAC signing key for tamper verification. -- **`--events-webhook-retries`**: transient retry attempts (network errors, HTTP `5xx`, `429`). -- **`--events-webhook-backoff`**: exponential backoff base duration between retries. -- **`--events-webhook-timeout`**: per-request HTTP timeout. - -You can enable **one or both** sinks. - -## Event envelope (version 1.0) - -Each record matches this shape: - -| Field | Type | Description | -|-------|------|-------------| -| `specversion` | string | Always `1.0` for this schema. | -| `id` | string | Unique id (UUID) per delivery. | -| `type` | string | Stable event type (see below). | -| `source` | string | Usually `gitopsctl-controller` (runtime) or `gitopsctl-cli` (mutating CLI commands with event sinks configured). | -| `time` | string | UTC RFC3339 timestamp. | -| `data` | object | Event-specific payload. | - -## Event types - -| `type` | When | Typical `data` fields | -|--------|------|-------------------------| -| `io.gitopsctl.controller.started` | After controller dispatches initial reconcilers | `applications`, `clusters` (counts) | -| `io.gitopsctl.controller.stopping` | Before shutdown begins | (often empty) | -| `io.gitopsctl.app.registered` | App registered or updated by API/CLI | `app`, `repoURL`, `branch`, `path`, `cluster`, `interval`, `updated` | -| `io.gitopsctl.app.unregistered` | App removed by API/CLI | `app` (+ optional metadata in CLI path) | -| `io.gitopsctl.app.sync_requested` | Manual sync requested (API or `sync-app`) | `app` | -| `io.gitopsctl.app.sync_started` | Start of a sync attempt | `app`, `cluster`, `trigger` (`initial` \| `poll` \| `manual`), `lastSyncedHash` | -| `io.gitopsctl.app.git_pull_failed` | Git clone/pull error | `app`, `cluster`, `trigger`, `error` | -| `io.gitopsctl.app.manifest_path_missing` | Manifest path missing in repo | `app`, `cluster`, `trigger`, `path` | -| `io.gitopsctl.app.apply_failed` | kubectl apply errors | `app`, `cluster`, `trigger`, `error` | -| `io.gitopsctl.app.sync_succeeded` | Manifests applied and commit recorded | `app`, `cluster`, `trigger`, `commit`, `previousCommit` | -| `io.gitopsctl.app.sync_no_changes` | Repo already at synced commit | `app`, `cluster`, `trigger`, `commit` — **not** emitted on periodic polls when nothing changed (only `manual` / `initial`) | - -Cluster connectivity: - -| `type` | When | Typical `data` fields | -|--------|------|-------------------------| -| `io.gitopsctl.cluster.registered` | Cluster registered or updated by API/CLI | `cluster`, `kubeconfig` | -| `io.gitopsctl.cluster.unregistered` | Cluster removed by API/CLI | `cluster` | -| `io.gitopsctl.cluster.health_check_requested` | Manual health check requested | `cluster` | -| `io.gitopsctl.cluster.health_check_completed` | After each health check run | `cluster`, `status`, `message` | - -## CLI parity with the API - -These commands call the **running controller** HTTP API (same as curl): - -- `gitopsctl sync-app -n ` → `POST /api/v1/applications/:name/sync` -- `gitopsctl check-cluster -n ` → `POST /api/v1/clusters/:name/check` - -Use `--api-url` on the root command if the API is not at `http://127.0.0.1:8080`. - -## Follow events locally - -```bash -gitopsctl tail-events --file configs/events.jsonl -``` - -## Live stream (SSE) - -For browser-friendly or backend subscribers that prefer a long-lived stream: - -```bash -curl -N http://127.0.0.1:8080/api/v1/events -``` - -SSE frames include: - -- `id`: event envelope id -- `event`: event type -- `data`: full JSON envelope - -This stream is best-effort in-memory fan-out. Slow clients may miss events and should tolerate gaps. - -## CLI scenario test scripts - -- `scripts/cli-smoke.sh` — broad command coverage + synthetic/runtime checks; safe for CI. -- `scripts/cli-real-scenario.sh` — strict real-cluster scenario (fails hard on errors), intended for local/release validation. - -## Semantics - -- **Ordering**: no strict global ordering guarantee across apps; file sink preserves append order per process. -- **Delivery**: webhook posts retry transient failures using exponential backoff, then fail best-effort if attempts are exhausted. Build idempotent receivers keyed by `id`. -- **Security**: treat webhook URLs, bearer tokens, and signing secrets as secrets; prefer HTTPS. - -## Webhook signing - -When `--events-webhook-secret` is set, GitOpsCTL signs each request with: - -- `X-GitOpsctl-Timestamp`: RFC3339Nano UTC timestamp -- `X-GitOpsctl-Event-ID`: envelope id -- `X-GitOpsctl-Signature`: `sha256=` - -Signature payload: - -```text -. -``` - -Use the same HMAC secret on the receiver to verify authenticity and integrity. - -### Go verification example - -```go -package main - -import ( - "crypto/hmac" - "crypto/sha256" - "crypto/subtle" - "encoding/hex" - "fmt" - "io" - "net/http" - "strings" - "time" -) - -func verifyWebhook(secret string, r *http.Request) ([]byte, error) { - sigHeader := r.Header.Get("X-GitOpsctl-Signature") // sha256= - tsHeader := r.Header.Get("X-GitOpsctl-Timestamp") // RFC3339Nano - if sigHeader == "" || tsHeader == "" { - return nil, fmt.Errorf("missing signature headers") - } - - parts := strings.SplitN(sigHeader, "=", 2) - if len(parts) != 2 || parts[0] != "sha256" { - return nil, fmt.Errorf("invalid signature format") - } - gotSig, err := hex.DecodeString(parts[1]) - if err != nil { - return nil, fmt.Errorf("bad signature hex: %w", err) - } - - // Optional replay protection window (5m). - ts, err := time.Parse(time.RFC3339Nano, tsHeader) - if err != nil { - return nil, fmt.Errorf("bad timestamp: %w", err) - } - if d := time.Since(ts); d > 5*time.Minute || d < -30*time.Second { - return nil, fmt.Errorf("timestamp outside allowed window") - } - - body, err := io.ReadAll(r.Body) - if err != nil { - return nil, err - } - - mac := hmac.New(sha256.New, []byte(secret)) - mac.Write([]byte(tsHeader)) - mac.Write([]byte(".")) - mac.Write(body) - wantSig := mac.Sum(nil) - - if subtle.ConstantTimeCompare(gotSig, wantSig) != 1 { - return nil, fmt.Errorf("signature mismatch") - } - return body, nil -} -``` - -Use `X-GitOpsctl-Event-ID` for idempotency (dedupe repeated deliveries). diff --git a/docs/phase1.md b/docs/phase1.md deleted file mode 100644 index ba7ebef..0000000 --- a/docs/phase1.md +++ /dev/null @@ -1,39 +0,0 @@ -# Phase 1 scope - -Phase 1 delivers a **trustworthy minimal GitOps loop** that matches the project goal: Git as desired state, external controller, named clusters, **CLI for day-to-day operations** plus an optional REST API for automation. Phase 2 clarifies **CLI-first** positioning and **integration hooks** for external dashboards; see [phase2.md](./phase2.md). - -## Already implemented (baseline) - -- Register/list/status/unregister **applications** and **clusters** (CLI). -- Persist definitions under `configs/applications.json` and `configs/clusters.json`. -- **Controller**: poll Git on an interval, detect new commits, apply YAML manifests via client-go. -- **REST API** (`gitopsctl start`, default `:8080`): CRUD-style routes for apps and clusters, manual app sync, cluster health check endpoint, `/health`. -- Logging (zap), structured API access logs, graceful shutdown on SIGINT/SIGTERM. - -## Phase 1 completion checklist (recommended) - -These items close the gap between “prototype” and something contributors and users can rely on. - -### Quality and safety - -1. **Automated tests**: unit tests for validation, config load/save, API handlers (happy paths + errors); targeted tests for Git/K8s boundaries with fakes or integration tags where feasible. -2. **CI**: run `go test ./...`, `go vet ./...`, and `go build` on every PR (e.g. GitHub Actions); pin Go version to `go.mod`. -3. **Version surfacing**: `-v` / `--version` (inject commit/tag at build time via `-ldflags`). -4. **Deterministic bootstrap**: ensure `configs/` creation and empty-file behavior are tested and documented (first-run story). - -### Operations - -5. **API hardening for real networks**: authentication option (e.g. bearer token or mTLS) **or** clear “bind to localhost only” guidance until auth lands; document threat model in README. -6. **Kubeconfig context**: optional `--context` on cluster registration and apply path so multi-context kubeconfigs behave predictably (today docs previously implied this; behavior should match docs). - -### Documentation and UX - -7. **Single API reference**: one markdown table or minimal OpenAPI for `/api/v1` (request/response shapes, status codes). -8. **CONTRIBUTING.md**: build, run controller locally, run tests, PR expectations. -9. **CHANGELOG.md** or tagged releases with SemVer once Phase 1 is “done.” - -### Explicitly out of scope for Phase 1 - -- Web UI, Helm/OCI-first workflows, plugin system, webhook-primary sync, advanced policy/notifications (track as Phase 2+). - -Use this file as the working definition of “Phase 1 complete” for roadmap and community discussions. diff --git a/docs/phase2.md b/docs/phase2.md deleted file mode 100644 index 34e2001..0000000 --- a/docs/phase2.md +++ /dev/null @@ -1,106 +0,0 @@ -# Phase 2 scope — CLI-first product, integrations for dashboards - -## Product stance - -**GitOpsCTL is a CLI-first tool.** The binary is the canonical way to operate the controller, manage apps and clusters, and inspect state. Anything we ship should assume operators live in the terminal, scripts, and CI— not inside a vendor-specific web UI. - -The optional HTTP server started with `gitopsctl start` is a **machine interface** for the running controller (automation, remote triggers, health probes). It is **not** the primary product surface and does not replace CLI completeness. - -**We do not ship a first-party dashboard.** Teams who want charts, multi-user consoles, or custom workflows should **listen to GitOpsCTL events** and **call the CLI or APIs** from their own services. Phase 2 is where we make that intentional and complete. - -## Principles - -1. **CLI parity**: Every meaningful operation available while the controller runs should also be available from the CLI (including triggers today exposed only via HTTP). Output must be script-friendly (`--output json` / stable schemas where listing matters). - -2. **Observable actions**: Important lifecycle and reconciliation events are emitted in a **stable, documented format** so external processes can subscribe—not buried only in human-oriented log lines. - -3. **Thin integration layer**: Prefer webhooks, structured streams, or exec hooks over growing an embedded UI. - -## Already helps dashboard builders (baseline) - -- Structured zap logs when the controller runs (parseable with a log pipeline). -- REST routes under `/api/v1` for apps/clusters/sync/check (usable by a backend that powers a custom UI). -- CLI list/status commands with formatting flags. - -Phase 2 closes gaps so **dashboard backends do not depend on scraping unstructured logs**. - -## Phase 2 deliverables (recommended order) - -### 1. CLI completeness and stable machine output - -- Audit HTTP handlers vs CLI: add missing commands or flags so **no capability is API-only**. -- Standardize **JSON schemas** for list/status/get outputs; document them in-repo. -- Consider a single **`gitopsctl events tail`** (or **`gitopsctl watch`**) that streams JSON lines to stdout for local tooling—optional but high leverage for “glue” dashboards. - -### 2. Event contract (for “listen and build your dashboard”) - -Define a **small versioned event envelope**, for example: - -- `specversion`, `type`, `source`, `time`, `data` (OpenTelemetry-style or CloudEvents-like—pick one and stick to it). - -Illustrative **event types** (exact names TBD in an RFC): - -- Controller lifecycle: started, stopping. -- App: registered, unregistered, sync_started, sync_succeeded, sync_failed, git_pull_failed, apply_failed. -- Cluster: registered, unregistered, health_check_completed. - -### 3. Pluggable sinks (how listeners attach) - -Implement **one or two** first-class sinks (avoid boiling the ocean): - -| Sink | Use case | -|------|----------| -| **HTTP webhook** | User URLs receive POST with JSON events; simplest for SaaS dashboards or serverless handlers. | -| **Append-only JSONL file** | Cheap durability and tail -f for local dev or agents. | -| **Unix socket / TCP stream** (optional) | Low-latency consumers on the same host. | - -Multiple sinks can be enabled from config (e.g. `configs/events.yaml` or flags on `start`). - -### 4. Documentation for dashboard authors - -- **Integration guide**: event types, payloads, retry semantics for webhooks, ordering guarantees (best-effort vs at-least-once—document honestly). -- Clarify security: webhook URLs often need TLS and shared secrets; document signing or static bearer tokens for callbacks. - -### 5. REST API role in Phase 2 - -- Keep `/api/v1` as **optional** automation surface. -- Align HTTP payloads with CLI JSON where possible so one schema drives both. -- Optional later: **SSE or WebSocket** on `/api/v1/events` if we want browser-adjacent consumers without webhooks—explicitly secondary to CLI + webhooks. - -## Explicitly out of scope for Phase 2 - -- Official GitOpsCTL web UI or hosted SaaS. -- Full Helm/OCI plugin ecosystem (can move to Phase 3 unless prioritized). -- Replacing polling with Git webhooks as the only sync path (can be Phase 2 stretch or Phase 3). - -## Relationship to Phase 3 - -Phase 3 can focus on **sync strategies**, **Helm/OCI**, and deeper **engine plugins**—still without requiring a built-in dashboard, unless the community later decides otherwise via governance. - ---- - -## Implementation status (ready to close) - -Shipping in-tree: - -- Integration **event envelope** (`specversion` 1.0) and controller emits for lifecycle, sync outcomes, and cluster health. -- **JSONL file** sink and optional **HTTP webhook** sink (`gitopsctl start` flags). -- **`gitopsctl tail-events`** for local JSONL following. -- **CLI ↔ API parity** for manual sync and cluster check: `sync-app`, `check-cluster` plus global **`--api-url`**. -- Register/unregister/requested events are emitted from both API handler flows and mutating CLI flows. -- Webhook hardening: retries + backoff + optional HMAC signing headers. - -## Phase 2 completion checklist - -- [x] CLI parity for API-only operational actions (`sync-app`, `check-cluster`) -- [x] Stable event envelope + documented event type catalog -- [x] At least two sinks (JSONL + webhook) -- [x] Dashboard integration docs, including webhook signature verification example -- [x] Webhook hardening (retry/backoff + optional signing) -- [x] Basic webhook behavior tests (retry path, non-retryable path, signing headers) -- [x] ADR for delivery guarantees and compatibility policy (versioning/deprecation) — [ADR 0001](./adr/0001-events-delivery-and-compatibility.md) -- [x] Optional stream endpoint (SSE) for browser-native consumers (`GET /api/v1/events`) - -Authoritative field list and semantics: [integrations.md](./integrations.md). Policy and compatibility guarantees: [ADR 0001](./adr/0001-events-delivery-and-compatibility.md). - -With this ADR accepted and SSE shipped, the current Phase 2 definition is complete. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 0000000..1b48c50 --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,180 @@ +# Troubleshooting + +This guide covers common GitOpsCTL setup and runtime issues. + +## Controller Starts With No Apps or Clusters + +Symptoms: + +- `gitopsctl start` logs that no applications or clusters are registered. + +Checks: + +```bash +ls -l configs/ +cat configs/applications.json +cat configs/clusters.json +``` + +Fix: + +- Register resources with `gitopsctl register-cluster` and `gitopsctl register-apps`. +- Or copy examples: + +```bash +mkdir -p configs +cp examples/configs/apps.json configs/applications.json +cp examples/configs/clusters.json configs/clusters.json +``` + +## Application Config Fails to Load + +Symptoms: + +- Error mentions invalid polling interval. +- App never appears after copying example JSON. + +Fix: + +- Use the JSON field `interval`, not `pollingInterval`. +- Use values accepted by Go durations: `30s`, `5m`, `1h`. +- Keep interval between 10 seconds and 24 hours when using the CLI. + +## Cluster Cannot Connect + +Symptoms: + +- Cluster status is `Unreachable` or `Error`. +- Syncs fail before applying manifests. + +Checks: + +```bash +kubectl --kubeconfig /path/from/config cluster-info +kubectl --kubeconfig /path/from/config auth can-i get namespaces +``` + +Fix: + +- Make `kubeconfigPath` absolute. +- If running in Docker, use the container path, not the host path. +- Confirm the API server is reachable from the GitOpsCTL runtime. +- Confirm the kubeconfig user or service account has required RBAC. + +## Namespace Is Not Allowed + +Symptoms: + +- Sync fails with a namespace allow-list error. + +Fix: + +- Add the manifest namespace to the cluster `allowedNamespaces`. +- Or change the manifest namespace. +- Or remove `allowedNamespaces` for unrestricted operation. + +Example: + +```json +{ + "name": "local-dev", + "kubeconfigPath": "/Users/you/.kube/config", + "allowedNamespaces": ["demo", "default"] +} +``` + +## Kubernetes Namespace Does Not Exist + +Symptoms: + +- Applying a Deployment, Service, or Secret fails because the namespace is missing. + +Fix: + +- Add a Namespace manifest to the app path. +- Or create the namespace separately: + +```bash +kubectl create namespace demo +``` + +## Git Clone or Pull Fails + +Symptoms: + +- App status is `Error`. +- Message starts with `Git error`. + +Fix: + +- Confirm the repo URL is valid. +- Confirm the branch exists. +- For private repos, make SSH keys or tokens available to the GitOpsCTL process. +- Confirm outbound network access from the runtime. + +## Manual App Is OutOfSync + +Symptoms: + +- App status is `OutOfSync`. +- Message references latest and approved commit hashes. + +Fix: + +```bash +gitopsctl status-apps +gitopsctl approve-app --name --commit +``` + +Then trigger a sync if needed: + +```bash +gitopsctl sync-app --name +``` + +## Dashboard Cannot Connect + +Symptoms: + +- Dashboard opens with errors or empty data. + +Fix: + +```bash +curl http://127.0.0.1:8080/metrics +curl http://127.0.0.1:8080/health +gitopsctl dashboard --api-url http://127.0.0.1:8080 +``` + +- Confirm `gitopsctl start` is running. +- Confirm port mapping when using Docker. +- Match `dashboard --api-url` to `start --api-address`. + +## SOPS Decryption Fails + +Symptoms: + +- Sync fails while walking or decrypting manifest files. + +Fix: + +- Confirm the encrypted file is valid with `sops -d file.sops.yaml`. +- Confirm Age, PGP, or cloud KMS credentials are available to the controller. +- Do not commit plaintext replacement files. +- See [SOPS Secret Management](SOPS.md). + +## Event File Is Empty + +Symptoms: + +- `tail-events` shows no output. + +Fix: + +- Start the controller with `--events-file`. +- Trigger a sync or cluster check. + +```bash +gitopsctl start --events-file configs/events.jsonl +gitopsctl tail-events --file configs/events.jsonl --from-start +``` diff --git a/examples/README.md b/examples/README.md index c766e4c..cc97e48 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,29 +1,88 @@ # GitOpsCTL Examples -This directory contains examples of how to use GitOpsCTL to manage your Kubernetes applications. +This directory contains a runnable nginx demo for GitOpsCTL. -## Quickstart Example: NGINX +## Contents -To deploy the sample NGINX application in this directory using `gitopsctl`: +```text +examples/ + configs/ + apps.json Sample application config. Copy to configs/applications.json. + clusters.json Sample cluster config. Copy to configs/clusters.json and edit kubeconfigPath. + manifests/ + namespace.yaml + nginx-deployment.yaml + nginx-service.yaml + secret.sops.yaml.example +``` -1. **Register your local cluster** (e.g. Docker Desktop, OrbStack, Minikube): - ```bash - gitopsctl register-cluster -n local-cluster -k ~/.kube/config - ``` +## Run the Demo From a Local Checkout -2. **Register the application**, pointing to this repository: - ```bash - gitopsctl register-apps \ - -n example-nginx \ - -r https://github.com/aeswibon/gitopsctl.git \ - -p examples/nginx \ - -c local-cluster \ - -i 30s - ``` +```bash +mkdir -p configs +cp examples/configs/apps.json configs/applications.json +cp examples/configs/clusters.json configs/clusters.json +``` -3. **Start the controller**: - ```bash - gitopsctl start - ``` +Edit `configs/clusters.json`: -The controller will poll the repository and deploy the `deployment.yaml` found in `examples/nginx` to your cluster. +- Set `kubeconfigPath` to your real kubeconfig path. +- Keep `allowedNamespaces` as `["demo"]` unless you change the manifests. + +Start GitOpsCTL: + +```bash +gitopsctl start --api-address :8080 +``` + +In another terminal: + +```bash +gitopsctl status-apps +gitopsctl status-clusters +gitopsctl dashboard --api-url http://127.0.0.1:8080 +``` + +Validate the Kubernetes resources: + +```bash +kubectl get namespace demo +kubectl get deployment,service -n demo +``` + +## Register the Same Demo With Commands + +```bash +gitopsctl register-cluster \ + --name local-dev \ + --kubeconfig ~/.kube/config \ + --allowed-namespaces demo + +gitopsctl register-apps \ + --name nginx-demo \ + --repo https://github.com/aeswibon/gitopsctl.git \ + --branch main \ + --path examples/manifests \ + --cluster local-dev \ + --interval 30s \ + --sync-policy auto +``` + +## SOPS Example + +`examples/manifests/secret.sops.yaml.example` is a template showing where encrypted SOPS metadata belongs. Do not apply it directly. To test SOPS: + +1. Create a normal Kubernetes Secret manifest. +2. Encrypt it with `sops`. +3. Commit the encrypted file as `.yaml`, `.yml`, or `.json`. +4. Run GitOpsCTL in an environment that has access to the matching Age, PGP, or KMS key. + +See [SOPS Secret Management](../docs/SOPS.md). + +## Cleanup + +```bash +kubectl delete namespace demo +gitopsctl unregister --name nginx-demo +gitopsctl unregister-cluster --name local-dev +``` diff --git a/examples/configs/apps.json b/examples/configs/apps.json new file mode 100644 index 0000000..e740692 --- /dev/null +++ b/examples/configs/apps.json @@ -0,0 +1,13 @@ +[ + { + "name": "nginx-demo", + "repoURL": "https://github.com/aeswibon/gitopsctl.git", + "branch": "main", + "path": "examples/manifests", + "clusterName": "local-dev", + "interval": "30s", + "syncPolicy": "auto", + "status": "Pending", + "message": "Example application registered, awaiting first sync" + } +] diff --git a/examples/configs/clusters.json b/examples/configs/clusters.json new file mode 100644 index 0000000..0323219 --- /dev/null +++ b/examples/configs/clusters.json @@ -0,0 +1,9 @@ +[ + { + "name": "local-dev", + "kubeconfigPath": "/Users/you/.kube/config", + "status": "Pending", + "message": "Example cluster registered, update kubeconfigPath before use", + "allowedNamespaces": ["demo"] + } +] diff --git a/examples/manifests/namespace.yaml b/examples/manifests/namespace.yaml new file mode 100644 index 0000000..18434a6 --- /dev/null +++ b/examples/manifests/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: demo diff --git a/examples/manifests/nginx-deployment.yaml b/examples/manifests/nginx-deployment.yaml new file mode 100644 index 0000000..253d6d0 --- /dev/null +++ b/examples/manifests/nginx-deployment.yaml @@ -0,0 +1,27 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-demo + namespace: demo +spec: + replicas: 3 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: nginx:stable-alpine + ports: + - containerPort: 80 + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "200m" + memory: "256Mi" diff --git a/examples/manifests/nginx-service.yaml b/examples/manifests/nginx-service.yaml new file mode 100644 index 0000000..cc5713d --- /dev/null +++ b/examples/manifests/nginx-service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: nginx-service + namespace: demo +spec: + selector: + app: nginx + ports: + - protocol: TCP + port: 80 + targetPort: 80 + type: ClusterIP diff --git a/examples/manifests/secret.sops.yaml.example b/examples/manifests/secret.sops.yaml.example new file mode 100644 index 0000000..8f6e0fb --- /dev/null +++ b/examples/manifests/secret.sops.yaml.example @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Secret +metadata: + name: database-credentials + namespace: demo +type: Opaque +stringData: + db-password: ENC[AES256_GCM,data:some-encrypted-string,iv:iv-value,tag:tag-value,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age1example + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBtYlV2... + -----END AGE ENCRYPTED FILE----- + lastmodified: "2023-10-27T10:00:00Z" + mac: ENC[AES256_GCM,data:mac-value,iv:iv-value,tag:tag-value,type:str] + pgp: [] + unencrypted_suffix: _unencrypted + version: 3.7.3 diff --git a/examples/nginx/deployment.yaml b/examples/nginx/deployment.yaml deleted file mode 100644 index 7a5aefb..0000000 --- a/examples/nginx/deployment.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-example - labels: - app: nginx-example -spec: - replicas: 2 - selector: - matchLabels: - app: nginx-example - template: - metadata: - labels: - app: nginx-example - spec: - containers: - - name: nginx - image: nginx:1.25.4 - ports: - - containerPort: 80 diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 78d8b65..5af18a0 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -430,17 +430,25 @@ func (c *Controller) reconcileApp(appCtx context.Context, app *app.Application, var client k8sApplier - // Validate cluster exists before entering the polling loop. + // Initialize client if cluster exists if app.ClusterName != "" { c.clusters.RLock() - _, clusterExists := c.clusters.Get(app.ClusterName) + cl, ok := c.clusters.Get(app.ClusterName) c.clusters.RUnlock() - if !clusterExists { + if !ok { app.Status = "Error" app.Message = fmt.Sprintf("Cluster '%s' does not exist", app.ClusterName) c.saveAppStatus(app, appConfigFile, true) return } + newClient, err := k8s.NewClientSet(c.logger, cl.KubeconfigPath, cl.AllowedNamespaces) + if err != nil { + app.Status = "Error" + app.Message = fmt.Sprintf("Failed to create k8s client: %v", err) + c.saveAppStatus(app, appConfigFile, true) + return + } + client = newClient } interval := app.PollingInterval @@ -451,7 +459,7 @@ func (c *Controller) reconcileApp(appCtx context.Context, app *app.Application, defer ticker.Stop() c.performSync(appCtx, logger, app, repoDir, client, appConfigFile, "initial") - if app.Status == "Synced" || app.Status == "Healthy" { + if (app.Status == "Synced" || app.Status == "Healthy") && client != nil { c.checkHealth(appCtx, logger, app, client, appConfigFile) } @@ -600,12 +608,13 @@ func (c *Controller) notify(application *app.Application, oldStatus, oldHash str } func (c *Controller) checkHealth(ctx context.Context, logger *zap.Logger, application *app.Application, client k8sApplier, appConfigFile string) { - oldStatus := application.Status - oldHash := application.LastSyncedGitHash - if len(application.AppliedResources) == 0 { + if client == nil || len(application.AppliedResources) == 0 { return } + oldStatus := application.Status + oldHash := application.LastSyncedGitHash + overallStatus := "Healthy" var messages []string diff --git a/scripts/cli-smoke.sh b/scripts/cli-smoke.sh index c0e137e..83f4287 100755 --- a/scripts/cli-smoke.sh +++ b/scripts/cli-smoke.sh @@ -80,7 +80,7 @@ awk '/^ [a-z0-9][a-z0-9-]+[[:space:]]+/ {print $1}' "$TESTROOT/root.help" \ ./gitopsctl unregister -n demoapp --dry-run >/dev/null # Cluster registration requires a real kubeconfig; ensure we fail cleanly. -./gitopsctl register-cluster -n local -k /tmp/nonexistent-kubeconfig --dry-run --force >/dev/null || true +./gitopsctl register-cluster -n local -k /tmp/nonexistent-kubeconfig --dry-run --force >/dev/null 2>&1 || true # Start controller briefly and exercise API-backed CLI actions + SSE. ./gitopsctl start --api-address 127.0.0.1:18080 --events-file "$TESTROOT/configs/events.jsonl" >"$TESTROOT/start.log" 2>&1 &