From 4b7774d32432d0d7f8c3f46079a0387e9e037b50 Mon Sep 17 00:00:00 2001 From: dvcdsys Date: Thu, 4 Jun 2026 19:49:39 +0100 Subject: [PATCH 1/4] fix(server): make GitHub webhook registration idempotent (#68) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-registration always POSTed a new hook instead of checking whether one with the same delivery URL already existed, so re-adds, reindexes, and restarts accumulated duplicate hooks on a repo — GitHub then fanned every push out N times. Add githubapi.ListWebhooks + EnsureWebhook: list existing hooks, match on config.url, PATCH the match (and prune same-URL duplicates) instead of creating, and only POST when none match. Wire both registration paths (tryAutoRegisterWebhook, reconciler.reconcileOne) through it. The reconciler now reuses an existing hook even when the stored WebhookID was lost, so a reregister sweep never leaves old + new hooks side by side. Covered by new githubapi tests (create / reuse / prune-duplicates / ignore-non-matching-URL) and a reconciler reuse test. Docs updated. Co-Authored-By: Claude Opus 4.8 --- doc/WEBHOOKS.md | 20 ++- server/internal/githubapi/githubapi.go | 145 +++++++++++++++- server/internal/githubapi/githubapi_test.go | 178 ++++++++++++++++++++ server/internal/httpapi/gitrepos.go | 5 +- server/internal/tunnels/reconciler.go | 17 +- server/internal/tunnels/tunnels_test.go | 41 ++++- 6 files changed, 385 insertions(+), 21 deletions(-) diff --git a/doc/WEBHOOKS.md b/doc/WEBHOOKS.md index 1edd98f..aee62b6 100644 --- a/doc/WEBHOOKS.md +++ b/doc/WEBHOOKS.md @@ -82,10 +82,17 @@ When `webhook_mode=auto` and the PAT scope check passes: 1. Operator submits the add-repo form. The server clones the repo (`clone_repo` job) and starts indexing. -2. In parallel, the server calls `POST /repos/{owner}/{repo}/hooks` - on GitHub via `server/internal/githubapi/`. The hook payload sets - `events: ["push"]`, `content_type: json`, and embeds the - server-generated `webhook_secret`. +2. In parallel, the server registers the hook **idempotently** via + `server/internal/githubapi/` (`EnsureWebhook`): it first + `GET /repos/{owner}/{repo}/hooks` and looks for a hook whose + `config.url` already equals this server's delivery URL. If one + exists it is reused (PATCHed to refresh the secret/events) and any + extra duplicates pointing at the same URL are deleted; only when + none match does it `POST /repos/{owner}/{repo}/hooks`. The hook + payload sets `events: ["push"]`, `content_type: json`, and embeds + the server-generated `webhook_secret`. This is what prevents + duplicate hooks accumulating across re-adds, reindexes, and server + restarts (issue #68). 3. GitHub responds with the hook id. The id is stored on the `git_repos` row so a later DELETE can call `DELETE /repos/{owner}/{repo}/hooks/{id}` cleanly. @@ -154,7 +161,10 @@ On boot the server runs a one-shot audit This is also why rotating `CIX_PUBLIC_URL` should be paired with a "reregister all" sweep in the dashboard — there's no automatic -follow-up. +follow-up. The reconcile/reregister path is idempotent: a repo whose +hook id is still known is PATCHed in place; a repo whose stored id was +lost is matched by `config.url` and reused rather than re-created, so a +sweep never leaves a repo with old **and** new hooks side by side. ## 7. What gets re-indexed on a push diff --git a/server/internal/githubapi/githubapi.go b/server/internal/githubapi/githubapi.go index 52a1c8b..1472ace 100644 --- a/server/internal/githubapi/githubapi.go +++ b/server/internal/githubapi/githubapi.go @@ -73,10 +73,10 @@ func New() *Client { // repo-picker UI actually renders — so we don't bloat the JSON payload // (a single user can have several hundred repos visible via a PAT). type Repo struct { - FullName string `json:"full_name"` // "owner/name" - DefaultBranch string `json:"default_branch"` // used to auto-fill the branch input - Private bool `json:"private"` // shown as a lock icon in the dropdown - HTMLURL string `json:"html_url"` // canonical https://github.com/... form + FullName string `json:"full_name"` // "owner/name" + DefaultBranch string `json:"default_branch"` // used to auto-fill the branch input + Private bool `json:"private"` // shown as a lock icon in the dropdown + HTMLURL string `json:"html_url"` // canonical https://github.com/... form Description string `json:"description,omitempty"` Owner RepoOwner `json:"owner"` } @@ -359,11 +359,22 @@ type CreateWebhookOptions struct { Insecure bool // mostly for tests against http:// origins } -// HookResponse is the slice of the GitHub response we care about. +// HookConfig mirrors the `config` slice of a GitHub hook. We only read +// `url` — the delivery target — which is what makes registration +// idempotent: a hook whose config.url already equals our delivery URL is +// the cix hook, so we reuse it instead of POSTing a duplicate. +type HookConfig struct { + URL string `json:"url"` +} + +// HookResponse is the slice of the GitHub response we care about. The +// top-level URL is the hook's own API URL (…/hooks/{id}); Config.URL is the +// delivery target the hook POSTs to. type HookResponse struct { - ID int64 `json:"id"` - URL string `json:"url"` - Active bool `json:"active"` + ID int64 `json:"id"` + URL string `json:"url"` + Active bool `json:"active"` + Config HookConfig `json:"config"` } // CreateWebhook calls POST /repos/{owner}/{repo}/hooks. Returns the @@ -422,6 +433,124 @@ func (c *Client) CreateWebhook(ctx context.Context, opts CreateWebhookOptions) ( } } +// ListWebhooks calls GET /repos/{owner}/{repo}/hooks and returns the repo's +// configured hooks (with their delivery config.url). Used to make +// registration idempotent — callers match on config.url to find an existing +// cix hook before creating a new one. Walks Link rel=next up to maxPages +// (0 = no cap); a repo with >100 hooks is pathological, so the default +// caller passes a small ceiling. +func (c *Client) ListWebhooks(ctx context.Context, owner, repo, pat string, maxPages int) ([]HookResponse, error) { + if owner == "" || repo == "" { + return nil, fmt.Errorf("owner/repo required") + } + if pat == "" { + return nil, fmt.Errorf("PAT required") + } + out := []HookResponse{} + page := 0 + pageURL := c.BaseURL + "/repos/" + url.PathEscape(owner) + "/" + url.PathEscape(repo) + "/hooks?per_page=100" + for pageURL != "" { + page++ + req, err := http.NewRequestWithContext(ctx, http.MethodGet, pageURL, nil) + if err != nil { + return nil, err + } + c.signRequest(req, pat) + resp, err := c.HTTPClient.Do(req) + if err != nil { + return nil, fmt.Errorf("github API: %w", err) + } + body, _ := io.ReadAll(resp.Body) + resp.Body.Close() + switch resp.StatusCode { + case http.StatusOK: + var batch []HookResponse + if err := json.Unmarshal(body, &batch); err != nil { + return nil, fmt.Errorf("parse hooks page: %w", err) + } + out = append(out, batch...) + case http.StatusUnauthorized, http.StatusForbidden: + return nil, fmt.Errorf("%w: %s", ErrUnauthorized, githubMessage(body)) + case http.StatusNotFound: + return nil, fmt.Errorf("%w: %s", ErrNotFound, githubMessage(body)) + default: + return nil, fmt.Errorf("github API %d: %s", resp.StatusCode, githubMessage(body)) + } + if maxPages > 0 && page >= maxPages { + break + } + pageURL = parseNextLink(resp.Header.Get("Link")) + } + return out, nil +} + +// EnsureWebhook is the idempotent registration entry point. It lists the +// repo's existing hooks and, if one already targets opts.URL, PATCHes it in +// place (refreshing the secret/events/active flag) and returns its id; +// otherwise it POSTs a fresh hook. Any *additional* hooks pointing at the +// same delivery URL are deleted, so a repo that already accumulated +// duplicates (issue #68) converges to exactly one cix hook. +// +// The returned bool reports whether a new hook was created (false = an +// existing one was reused). Callers persist the returned id either way. +func (c *Client) EnsureWebhook(ctx context.Context, opts CreateWebhookOptions) (HookResponse, bool, error) { + hooks, err := c.ListWebhooks(ctx, opts.Owner, opts.Repo, opts.PAT, 10) + if err != nil { + return HookResponse{}, false, err + } + var matches []HookResponse + for _, h := range hooks { + if sameDeliveryURL(h.Config.URL, opts.URL) { + matches = append(matches, h) + } + } + if len(matches) == 0 { + hr, cerr := c.CreateWebhook(ctx, opts) + if cerr != nil { + return HookResponse{}, false, cerr + } + return hr, true, nil + } + + // Reuse the first match; PATCH to refresh delivery config. If GitHub + // reports it gone (raced delete between list and patch), create instead. + keep := matches[0] + hr, uerr := c.UpdateWebhook(ctx, UpdateWebhookOptions{ + Owner: opts.Owner, + Repo: opts.Repo, + PAT: opts.PAT, + HookID: keep.ID, + URL: opts.URL, + Secret: opts.Secret, + Events: opts.Events, + Insecure: opts.Insecure, + }) + if uerr != nil { + if errors.Is(uerr, ErrNotFound) { + hr2, cerr := c.CreateWebhook(ctx, opts) + if cerr != nil { + return HookResponse{}, false, cerr + } + return hr2, true, nil + } + return HookResponse{}, false, uerr + } + + // Prune any leftover duplicates so the repo ends up with one cix hook. + for _, dup := range matches[1:] { + _ = c.DeleteWebhook(ctx, opts.Owner, opts.Repo, opts.PAT, dup.ID) + } + return hr, false, nil +} + +// sameDeliveryURL compares two hook delivery URLs for idempotency. Exact +// match after trimming a trailing slash — cix builds delivery URLs +// deterministically, so anything subtler would risk treating a genuinely +// different endpoint as the same hook. +func sameDeliveryURL(a, b string) bool { + return strings.TrimRight(a, "/") == strings.TrimRight(b, "/") && a != "" +} + // UpdateWebhookOptions parameterises a hook update. HookID identifies the // existing hook; URL/Secret are the new delivery config. Events defaults // to ["push"] when nil. diff --git a/server/internal/githubapi/githubapi_test.go b/server/internal/githubapi/githubapi_test.go index 713af78..61c78aa 100644 --- a/server/internal/githubapi/githubapi_test.go +++ b/server/internal/githubapi/githubapi_test.go @@ -7,6 +7,7 @@ import ( "io" "net/http" "net/http/httptest" + "strconv" "strings" "testing" ) @@ -467,6 +468,183 @@ func TestListReposForAccountUsesCorrectEndpoint(t *testing.T) { } } +// hooksServer is a tiny stateful in-memory fake of GitHub's +// /repos/{owner}/{repo}/hooks endpoints — enough to exercise the +// list→match→create/update/delete dance EnsureWebhook performs. The map is +// id → delivery (config.url). Counters let tests assert no duplicate POSTs. +type hooksServer struct { + hooks map[int64]string + nextID int64 + posts int + patches int + deletes int +} + +func newHooksServer(t *testing.T, seed map[int64]string) (*Client, *hooksServer) { + t.Helper() + st := &hooksServer{hooks: map[int64]string{}, nextID: 100} + for id, u := range seed { + st.hooks[id] = u + if id >= st.nextID { + st.nextID = id + 1 + } + } + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + // Path is /repos/o/r/hooks or /repos/o/r/hooks/{id}. + parts := strings.Split(strings.Trim(r.URL.Path, "/"), "/") + var hookID int64 + if len(parts) == 5 { // repos/o/r/hooks/{id} + hookID, _ = strconv.ParseInt(parts[4], 10, 64) + } + switch r.Method { + case http.MethodGet: + type hook struct { + ID int64 `json:"id"` + Active bool `json:"active"` + Config HookConfig `json:"config"` + } + out := []hook{} + for id, u := range st.hooks { + out = append(out, hook{ID: id, Active: true, Config: HookConfig{URL: u}}) + } + _ = json.NewEncoder(w).Encode(out) + case http.MethodPost: + st.posts++ + raw, _ := io.ReadAll(r.Body) + var body struct { + Config struct { + URL string `json:"url"` + } `json:"config"` + } + _ = json.Unmarshal(raw, &body) + id := st.nextID + st.nextID++ + st.hooks[id] = body.Config.URL + w.WriteHeader(http.StatusCreated) + _, _ = w.Write([]byte(`{"id":` + strconv.FormatInt(id, 10) + `,"active":true,"config":{"url":"` + body.Config.URL + `"}}`)) + case http.MethodPatch: + st.patches++ + if _, ok := st.hooks[hookID]; !ok { + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"message":"Not Found"}`)) + return + } + raw, _ := io.ReadAll(r.Body) + var body struct { + Config struct { + URL string `json:"url"` + } `json:"config"` + } + _ = json.Unmarshal(raw, &body) + st.hooks[hookID] = body.Config.URL + _, _ = w.Write([]byte(`{"id":` + strconv.FormatInt(hookID, 10) + `,"active":true,"config":{"url":"` + body.Config.URL + `"}}`)) + case http.MethodDelete: + st.deletes++ + delete(st.hooks, hookID) + w.WriteHeader(http.StatusNoContent) + default: + w.WriteHeader(http.StatusMethodNotAllowed) + } + })) + t.Cleanup(srv.Close) + c := New() + c.BaseURL = srv.URL + return c, st +} + +func TestEnsureWebhookCreatesWhenNoneExist(t *testing.T) { + c, st := newHooksServer(t, nil) + hr, created, err := c.EnsureWebhook(context.Background(), CreateWebhookOptions{ + Owner: "o", Repo: "r", PAT: "ghp_x", + URL: "https://cix.test/api/v1/webhooks/github/abc", Secret: "s", + }) + if err != nil { + t.Fatalf("EnsureWebhook: %v", err) + } + if !created { + t.Fatalf("expected created=true on empty repo") + } + if st.posts != 1 { + t.Fatalf("expected 1 POST, got %d", st.posts) + } + if st.hooks[hr.ID] != "https://cix.test/api/v1/webhooks/github/abc" { + t.Fatalf("hook not registered with expected url: %v", st.hooks) + } +} + +func TestEnsureWebhookReusesMatchingURL(t *testing.T) { + const url = "https://cix.test/api/v1/webhooks/github/abc" + c, st := newHooksServer(t, map[int64]string{55: url}) + hr, created, err := c.EnsureWebhook(context.Background(), CreateWebhookOptions{ + Owner: "o", Repo: "r", PAT: "ghp_x", URL: url, Secret: "s", + }) + if err != nil { + t.Fatalf("EnsureWebhook: %v", err) + } + if created { + t.Fatalf("expected created=false (reuse), got true") + } + if hr.ID != 55 { + t.Fatalf("expected reused id 55, got %d", hr.ID) + } + if st.posts != 0 { + t.Fatalf("must not POST a duplicate, got %d POSTs", st.posts) + } + if st.patches != 1 { + t.Fatalf("expected 1 PATCH to refresh config, got %d", st.patches) + } + if len(st.hooks) != 1 { + t.Fatalf("expected exactly one hook to remain, got %d", len(st.hooks)) + } +} + +func TestEnsureWebhookPrunesDuplicates(t *testing.T) { + const url = "https://cix.test/api/v1/webhooks/github/abc" + // Three identical hooks already exist (the issue #68 state). + c, st := newHooksServer(t, map[int64]string{1: url, 2: url, 3: url}) + _, created, err := c.EnsureWebhook(context.Background(), CreateWebhookOptions{ + Owner: "o", Repo: "r", PAT: "ghp_x", URL: url, Secret: "s", + }) + if err != nil { + t.Fatalf("EnsureWebhook: %v", err) + } + if created { + t.Fatalf("expected created=false, got true") + } + if st.posts != 0 { + t.Fatalf("must not POST, got %d", st.posts) + } + if st.deletes != 2 { + t.Fatalf("expected 2 duplicate deletes, got %d", st.deletes) + } + if len(st.hooks) != 1 { + t.Fatalf("repo should converge to a single hook, got %d", len(st.hooks)) + } +} + +func TestEnsureWebhookIgnoresNonMatchingURL(t *testing.T) { + // A hook for a *different* delivery URL must not be reused — that would + // hijack an unrelated webhook. EnsureWebhook creates a new one alongside. + c, st := newHooksServer(t, map[int64]string{9: "https://other.test/hook"}) + _, created, err := c.EnsureWebhook(context.Background(), CreateWebhookOptions{ + Owner: "o", Repo: "r", PAT: "ghp_x", + URL: "https://cix.test/api/v1/webhooks/github/abc", Secret: "s", + }) + if err != nil { + t.Fatalf("EnsureWebhook: %v", err) + } + if !created { + t.Fatalf("expected created=true for a non-matching existing hook") + } + if st.posts != 1 { + t.Fatalf("expected 1 POST, got %d", st.posts) + } + if len(st.hooks) != 2 { + t.Fatalf("expected both hooks to coexist, got %d", len(st.hooks)) + } +} + func TestParseOwnerRepo(t *testing.T) { cases := map[string][2]string{ "https://github.com/spf13/cobra": {"spf13", "cobra"}, diff --git a/server/internal/httpapi/gitrepos.go b/server/internal/httpapi/gitrepos.go index 4000451..4954ce4 100644 --- a/server/internal/httpapi/gitrepos.go +++ b/server/internal/httpapi/gitrepos.go @@ -602,7 +602,10 @@ func (s *Server) tryAutoRegisterWebhook(ctx context.Context, g gitrepos.GitRepo, if perr != nil { return false, "github_url is not a parseable owner/repo URL" } - hr, herr := githubapi.New().CreateWebhook(ctx, githubapi.CreateWebhookOptions{ + // EnsureWebhook is idempotent: if the repo already has a hook pointing at + // this delivery URL it reuses (and PATCHes) it instead of POSTing a + // duplicate, and prunes any accumulated duplicates (issue #68). + hr, _, herr := githubapi.New().EnsureWebhook(ctx, githubapi.CreateWebhookOptions{ Owner: owner, Repo: repo, PAT: pat, diff --git a/server/internal/tunnels/reconciler.go b/server/internal/tunnels/reconciler.go index dc3ef06..ea05ab3 100644 --- a/server/internal/tunnels/reconciler.go +++ b/server/internal/tunnels/reconciler.go @@ -33,8 +33,11 @@ type TokenRevealer interface { // WebhookClient is the GitHub webhook API surface. Satisfied by // *githubapi.Client. type WebhookClient interface { - CreateWebhook(ctx context.Context, opts githubapi.CreateWebhookOptions) (githubapi.HookResponse, error) UpdateWebhook(ctx context.Context, opts githubapi.UpdateWebhookOptions) (githubapi.HookResponse, error) + // EnsureWebhook registers idempotently: it reuses an existing hook whose + // delivery URL already matches (deduping accumulated duplicates) or + // creates one. The bool reports whether a new hook was created. + EnsureWebhook(ctx context.Context, opts githubapi.CreateWebhookOptions) (githubapi.HookResponse, bool, error) } // Reconciler re-points every webhook_mode=auto repo at the current public @@ -172,7 +175,11 @@ func (r *Reconciler) reconcileOne(ctx context.Context, g gitrepos.GitRepo, baseU r.logger.Warn("webhook gone on GitHub side, recreating", "project", g.ProjectPath) } - hr, cerr := r.gh.CreateWebhook(ctx, githubapi.CreateWebhookOptions{ + // No (usable) stored hook id: register idempotently. EnsureWebhook reuses + // a hook already pointing at this delivery URL — and prunes duplicates — + // rather than blindly POSTing a new one, which is how repos accumulated + // duplicate hooks before (issue #68). + hr, created, cerr := r.gh.EnsureWebhook(ctx, githubapi.CreateWebhookOptions{ Owner: owner, Repo: repo, PAT: pat, @@ -187,6 +194,10 @@ func (r *Reconciler) reconcileOne(ctx context.Context, g gitrepos.GitRepo, baseU if serr := r.repos.SetWebhookID(ctx, g.ProjectPath, hr.ID); serr != nil { r.logger.Warn("could not persist webhook id", "project", g.ProjectPath, "err", serr) } - out.Action = "created" + if created { + out.Action = "created" + } else { + out.Action = "updated" + } return out } diff --git a/server/internal/tunnels/tunnels_test.go b/server/internal/tunnels/tunnels_test.go index 3ed4b18..7e65350 100644 --- a/server/internal/tunnels/tunnels_test.go +++ b/server/internal/tunnels/tunnels_test.go @@ -196,12 +196,11 @@ func (fakeTokens) Touch(_ context.Context, _ string) error { return n type fakeGH struct { created, updated int failUpdateNotFound bool + // existing simulates GitHub-side hooks keyed by delivery URL, so + // EnsureWebhook can model the idempotent reuse path. + existing map[string]int64 } -func (f *fakeGH) CreateWebhook(_ context.Context, _ githubapi.CreateWebhookOptions) (githubapi.HookResponse, error) { - f.created++ - return githubapi.HookResponse{ID: 999}, nil -} func (f *fakeGH) UpdateWebhook(_ context.Context, opts githubapi.UpdateWebhookOptions) (githubapi.HookResponse, error) { if f.failUpdateNotFound { return githubapi.HookResponse{}, fmt.Errorf("%w: gone", githubapi.ErrNotFound) @@ -209,6 +208,14 @@ func (f *fakeGH) UpdateWebhook(_ context.Context, opts githubapi.UpdateWebhookOp f.updated++ return githubapi.HookResponse{ID: opts.HookID}, nil } +func (f *fakeGH) EnsureWebhook(_ context.Context, opts githubapi.CreateWebhookOptions) (githubapi.HookResponse, bool, error) { + if id, ok := f.existing[opts.URL]; ok { + f.updated++ + return githubapi.HookResponse{ID: id, Config: githubapi.HookConfig{URL: opts.URL}}, false, nil + } + f.created++ + return githubapi.HookResponse{ID: 999, Config: githubapi.HookConfig{URL: opts.URL}}, true, nil +} func i64(v int64) *int64 { return &v } @@ -256,6 +263,32 @@ func TestReconcileRecreatesWhenHookGone(t *testing.T) { } } +func TestReconcileReusesExistingHookWithoutStoredID(t *testing.T) { + // A repo with no stored WebhookID but an existing hook already pointing + // at the delivery URL must be reused (updated), not duplicated. This is + // the idempotency guarantee of issue #68. + repos := &fakeRepos{repos: []gitrepos.GitRepo{ + {ProjectPath: "github.com/o/a@main", PathHash: "aaa", GitHubURL: "https://github.com/o/a", TokenID: "t1", WebhookSecret: "s", WebhookMode: gitrepos.WebhookModeAuto}, + }} + delivery := "https://x.trycloudflare.com" + WebhookPathPrefix + "aaa" + gh := &fakeGH{existing: map[string]int64{delivery: 77}} + r := NewReconciler(repos, fakeTokens{}, gh, nil) + + res, err := r.Reconcile(context.Background(), "https://x.trycloudflare.com") + if err != nil { + t.Fatalf("Reconcile: %v", err) + } + if res.Created != 0 || res.Updated != 1 { + t.Fatalf("expected reuse (updated), got created=%d updated=%d (%+v)", res.Created, res.Updated, res.Outcomes) + } + if gh.created != 0 { + t.Fatalf("must not POST a duplicate hook, got created=%d", gh.created) + } + if repos.setIDs["github.com/o/a@main"] != 77 { + t.Fatalf("reused hook id should be persisted, got %v", repos.setIDs) + } +} + func TestReconcileNoBaseURLIsNoOp(t *testing.T) { repos := &fakeRepos{repos: []gitrepos.GitRepo{ {ProjectPath: "github.com/o/a@main", PathHash: "aaa", GitHubURL: "https://github.com/o/a", TokenID: "t1", WebhookMode: gitrepos.WebhookModeAuto, WebhookID: i64(42)}, From e7994c79f3c6ec3c3571ece645d4a55ea661f62d Mon Sep 17 00:00:00 2001 From: dvcdsys Date: Thu, 4 Jun 2026 19:56:13 +0100 Subject: [PATCH 2/4] fix(server): prune duplicates on the EnsureWebhook race-create path; doc caveat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR review: - When the matched hook is deleted between list and PATCH (404), the create-replacement branch now also prunes any other same-URL duplicates we listed, instead of leaving them behind. - WEBHOOKS.md §6: note that a reconcile sweep does NOT prune pre-existing same-URL duplicates when the stored hook id is still valid (it PATCHes that one and returns); re-adding the repo collapses them via EnsureWebhook. Co-Authored-By: Claude Opus 4.8 --- doc/WEBHOOKS.md | 7 +++++++ server/internal/githubapi/githubapi.go | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/doc/WEBHOOKS.md b/doc/WEBHOOKS.md index aee62b6..cf72aa0 100644 --- a/doc/WEBHOOKS.md +++ b/doc/WEBHOOKS.md @@ -166,6 +166,13 @@ hook id is still known is PATCHed in place; a repo whose stored id was lost is matched by `config.url` and reused rather than re-created, so a sweep never leaves a repo with old **and** new hooks side by side. +One caveat for repos that *already* accumulated same-URL duplicates +before this fix: when the stored hook id is still valid, reconcile +PATCHes that one hook and returns — it does **not** list and prune the +sibling duplicates. Re-adding the repo (which routes through the +`EnsureWebhook` list→match→prune path) collapses them back to a single +hook; a plain reconcile sweep does not. + ## 7. What gets re-indexed on a push Each accepted `push` enqueues a `clone_repo` job, which: diff --git a/server/internal/githubapi/githubapi.go b/server/internal/githubapi/githubapi.go index 1472ace..81ac762 100644 --- a/server/internal/githubapi/githubapi.go +++ b/server/internal/githubapi/githubapi.go @@ -527,10 +527,16 @@ func (c *Client) EnsureWebhook(ctx context.Context, opts CreateWebhookOptions) ( }) if uerr != nil { if errors.Is(uerr, ErrNotFound) { + // matches[0] was deleted out from under us between list and + // patch. Create a replacement, but still prune any other + // same-URL duplicates we listed so we don't leave them behind. hr2, cerr := c.CreateWebhook(ctx, opts) if cerr != nil { return HookResponse{}, false, cerr } + for _, dup := range matches[1:] { + _ = c.DeleteWebhook(ctx, opts.Owner, opts.Repo, opts.PAT, dup.ID) + } return hr2, true, nil } return HookResponse{}, false, uerr From 13ae903f8eaec83ed1d6e2ba52e4e17ece8f9f7b Mon Sep 17 00:00:00 2001 From: dvcdsys Date: Thu, 4 Jun 2026 23:34:42 +0100 Subject: [PATCH 3/4] feat(cli): support custom HTTP request headers per server (#59) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-hosting cix behind an authenticating reverse proxy / Zero-Trust gateway (Cloudflare Access, oauth2-proxy, Authelia) blocks the CLI and AI-agent tooling at the edge: they send only the cix Bearer and have no way to satisfy the proxy off-VPN, so they get a 302/403 before reaching cix. The browser dashboard passes via interactive SSO and is unaffected. Add opt-in per-server custom headers attached to every outbound request (including the previously header-less /health probe) in addition to the cix Bearer — e.g. a Cloudflare Access service token. The proxy validates and strips them at the edge, so cix needs no knowledge of the proxy. No server-side changes. - config: ServerEntry.Headers (sensitive, omitempty) + SetServerHeader / UnsetServerHeader + validateHeader (RFC 7230 token names, anti-CRLF). - CLI: `cix config set/unset server..header.`; values never echoed; `cix config show` and TUI surface a count only. - client: SetCustomHeaders + applyCustomHeaders, applied in do(), Health() and the streaming path, always BEFORE cix-managed headers so a stray config can't clobber Authorization. - getClient: ${ENV}-expand values into a throwaway copy (never written back), validate after expansion, fail without echoing the value. - docs: CLI_CONFIG.md section + SECURITY_DEPLOYMENT.md cross-link. Tests: config round-trip/validation, client apply + Authorization precedence, and an e2e getClient test proving ${ENV} reaches the wire while the config file keeps the placeholder. Co-Authored-By: Claude Opus 4.8 --- cli/cmd/config.go | 64 ++++++++++++-- cli/cmd/headers_test.go | 90 ++++++++++++++++++++ cli/cmd/root.go | 17 ++++ cli/internal/client/client.go | 36 +++++++- cli/internal/client/client_test.go | 109 ++++++++++++++++++++++++ cli/internal/client/index.go | 2 + cli/internal/config/config.go | 87 +++++++++++++++++++ cli/internal/config/headers_test.go | 124 ++++++++++++++++++++++++++++ cli/internal/config/tui/sections.go | 19 +++-- doc/CLI_CONFIG.md | 69 ++++++++++++++-- doc/SECURITY_DEPLOYMENT.md | 12 +++ 11 files changed, 607 insertions(+), 22 deletions(-) create mode 100644 cli/cmd/headers_test.go create mode 100644 cli/internal/client/client_test.go create mode 100644 cli/internal/config/headers_test.go diff --git a/cli/cmd/config.go b/cli/cmd/config.go index f3353f5..73a9e50 100644 --- a/cli/cmd/config.go +++ b/cli/cmd/config.go @@ -35,10 +35,15 @@ Run 'cix config keys' to list every settable key with its description, default, and env-var override. Beyond those schema keys, three patterns manage the multi-server layout: - server..url URL of a named server (creates the entry if absent) - server..key API key of a named server - default_server which server is used when --server is omitted - api.url / api.key legacy aliases — operate on the default server + server..url URL of a named server (creates the entry if absent) + server..key API key of a named server + server..header. custom HTTP header sent on every request + default_server which server is used when --server is omitted + api.url / api.key legacy aliases — operate on the default server + +Custom headers let the CLI pass an authenticating reverse proxy in front of +cix (e.g. a Cloudflare Access service token). Values support ${ENV} expansion +at request time, so secrets stay out of the config file. List-valued keys (e.g. watcher.exclude) use comma-separated input with REPLACE semantics: 'cix config set watcher.exclude "node_modules,vendor"' @@ -49,6 +54,10 @@ Examples: cix config set server.corporate.key cix_abc123... cix config set default_server corporate + # custom headers (e.g. Cloudflare Access service token): + cix config set server.corporate.header.CF-Access-Client-Id ".access" + cix config set server.corporate.header.CF-Access-Client-Secret '${CIX_CF_ACCESS_SECRET}' + cix config set api.url http://localhost:21847 # legacy alias cix config set api.key cix_abc123... # legacy alias @@ -65,12 +74,14 @@ var configUnsetCmd = &cobra.Command{ Long: `Remove configuration entries. Supported keys: - server. - remove the named server entirely - server..key - clear the named server's API key + server. - remove the named server entirely + server..key - clear the named server's API key + server..header. - remove a custom HTTP header Examples: cix config unset server.corporate - cix config unset server.corporate.key`, + cix config unset server.corporate.key + cix config unset server.corporate.header.CF-Access-Client-Id`, Args: cobra.ExactArgs(1), RunE: runConfigUnset, } @@ -159,7 +170,13 @@ func renderServersBlock(w io.Writer, cfg *config.Config) { if s.Name == cfg.DefaultServer { marker = "* " } - fmt.Fprintf(w, "%s%-16s url=%s key=%s\n", marker, s.Name, s.URL, keyStatus) + fmt.Fprintf(w, "%s%-16s url=%s key=%s", marker, s.Name, s.URL, keyStatus) + // Surface custom headers by COUNT only — values may be secrets and must + // never be printed. Omitted entirely when none are set. + if n := len(s.Headers); n > 0 { + fmt.Fprintf(w, " headers=%d", n) + } + fmt.Fprintln(w) } } @@ -242,6 +259,16 @@ func runConfigSet(cmd *cobra.Command, args []string) error { fmt.Printf("✓ Set %s (server %q)\n", key, name) return nil case strings.HasPrefix(key, "server."): + // Header form: server..header. (HeaderName may itself + // contain dots, so it is everything after the 3rd segment). + if name, headerName, ok := parseServerHeaderKey(key); ok { + if err := config.SetServerHeader(name, headerName, value); err != nil { + return err + } + // Never echo the value — header values may be secrets. + fmt.Printf("✓ Set server.%s.header.%s\n", name, headerName) + return nil + } name, field, perr := parseServerKey(key) if perr != nil { return perr @@ -282,6 +309,15 @@ func runConfigUnset(cmd *cobra.Command, args []string) error { return fmt.Errorf("unknown unset key: %s (supported: server., server..key)", key) } + // Header form: server..header.. + if name, headerName, ok := parseServerHeaderKey(key); ok { + if err := config.UnsetServerHeader(name, headerName); err != nil { + return err + } + fmt.Printf("✓ Removed header %q for server %q\n", headerName, name) + return nil + } + rest := strings.TrimPrefix(key, "server.") switch { case strings.HasSuffix(rest, ".key"): @@ -319,6 +355,18 @@ func defaultServerName(cfg *config.Config) string { return config.DefaultServerName } +// parseServerHeaderKey recognises the `server..header.` form +// and splits it into the server name and header name. HeaderName is everything +// after the literal `header.` segment, so it may itself contain dots. Returns +// ok=false for any other shape (so callers fall through to parseServerKey). +func parseServerHeaderKey(key string) (name, headerName string, ok bool) { + parts := strings.SplitN(key, ".", 4) + if len(parts) != 4 || parts[0] != "server" || parts[1] == "" || parts[2] != "header" || parts[3] == "" { + return "", "", false + } + return parts[1], parts[3], true +} + // parseServerKey splits a `server..` config key into its name and // field (url|key), validating the shape. func parseServerKey(key string) (name, field string, err error) { diff --git a/cli/cmd/headers_test.go b/cli/cmd/headers_test.go new file mode 100644 index 0000000..31798b2 --- /dev/null +++ b/cli/cmd/headers_test.go @@ -0,0 +1,90 @@ +package cmd + +import ( + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/anthropics/code-index/cli/internal/config" +) + +// TestGetClient_ExpandsHeaderEnvVars is the end-to-end proof of issue #59: +// a configured header with a ${VAR} placeholder is expanded at request time +// and reaches the wire, while the on-disk config keeps the placeholder (no +// plaintext secret persisted). +func TestGetClient_ExpandsHeaderEnvVars(t *testing.T) { + isolateConfig(t) + + var got http.Header + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + got = r.Header.Clone() + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + if err := config.SetServerURL(config.DefaultServerName, srv.URL); err != nil { + t.Fatal(err) + } + if err := config.SetServerKey(config.DefaultServerName, "k"); err != nil { + t.Fatal(err) + } + if err := config.SetServerHeader(config.DefaultServerName, "CF-Access-Client-Secret", "${CIX_TEST_SECRET}"); err != nil { + t.Fatal(err) + } + t.Setenv("CIX_TEST_SECRET", "expanded-secret-123") + withFlags(t, "", "", "") + + c, err := getClient() + if err != nil { + t.Fatalf("getClient: %v", err) + } + if err := c.Health(); err != nil { + t.Fatalf("Health: %v", err) + } + + if got.Get("CF-Access-Client-Secret") != "expanded-secret-123" { + t.Errorf("header on wire = %q, want expanded-secret-123", got.Get("CF-Access-Client-Secret")) + } + + // The config file must still hold the placeholder, not the secret. + raw, err := os.ReadFile(filepath.Join(os.Getenv("HOME"), ".cix", "config.yaml")) + if err != nil { + t.Fatalf("read config: %v", err) + } + if strings.Contains(string(raw), "expanded-secret-123") { + t.Errorf("secret leaked into config file:\n%s", raw) + } + if !strings.Contains(string(raw), "${CIX_TEST_SECRET}") { + t.Errorf("config should keep the ${CIX_TEST_SECRET} placeholder:\n%s", raw) + } +} + +// TestGetClient_InvalidHeaderErrors ensures a malformed header (after env +// expansion) fails getClient loudly and never echoes the value. +func TestGetClient_InvalidHeaderErrors(t *testing.T) { + isolateConfig(t) + if err := config.SetServerURL(config.DefaultServerName, "http://localhost:21847"); err != nil { + t.Fatal(err) + } + if err := config.SetServerKey(config.DefaultServerName, "k"); err != nil { + t.Fatal(err) + } + // Header value resolves to one containing CRLF via the env var (the config + // setter itself would reject a literal CRLF, but expansion happens later). + if err := config.SetServerHeader(config.DefaultServerName, "X-Bad", "${CIX_TEST_BAD}"); err != nil { + t.Fatal(err) + } + t.Setenv("CIX_TEST_BAD", "line1\r\nInjected: 1") + withFlags(t, "", "", "") + + _, err := getClient() + if err == nil { + t.Fatal("expected getClient to reject a CRLF-injected header value") + } + if strings.Contains(err.Error(), "Injected") { + t.Errorf("error must not echo the header value: %v", err) + } +} diff --git a/cli/cmd/root.go b/cli/cmd/root.go index 08ae2b3..c749e0e 100644 --- a/cli/cmd/root.go +++ b/cli/cmd/root.go @@ -193,6 +193,23 @@ func getClient() (*client.Client, error) { } c := client.New(url, key) + + // Custom headers: ${ENV}-expand each value into a local copy (never + // written back to disk, like the url/key overrides above) so secrets can + // stay in the environment rather than in config.yaml. Validate after + // expansion; on a bad header fail loudly WITHOUT echoing the value. + if len(srv.Headers) > 0 { + expanded := make(map[string]string, len(srv.Headers)) + for name, raw := range srv.Headers { + val := os.ExpandEnv(raw) + if err := config.ValidateHeader(name, val); err != nil { + return nil, fmt.Errorf("invalid custom header %q for server %q: %w", name, srv.Name, err) + } + expanded[name] = val + } + c.SetCustomHeaders(expanded) + } + if cfg.Indexing.StreamingIdleTimeoutSec > 0 { c.SetStreamingIdleTimeout(time.Duration(cfg.Indexing.StreamingIdleTimeoutSec) * time.Second) } diff --git a/cli/internal/client/client.go b/cli/internal/client/client.go index 9bd9b1b..b60beb4 100644 --- a/cli/internal/client/client.go +++ b/cli/internal/client/client.go @@ -21,6 +21,12 @@ type Client struct { apiKey string httpClient *http.Client + // customHeaders are user-configured headers attached to every outbound + // request (in addition to the cix Bearer) so the client can satisfy an + // authenticating reverse proxy in front of cix. Values are already + // ${ENV}-expanded and validated by the caller (getClient). Never logged. + customHeaders map[string]string + // streamingClient is used for endpoints that return chunked NDJSON // (currently only POST /index/files when Accept advertises x-ndjson). // Timeout is 0 because the natural duration of an indexing batch is @@ -50,6 +56,22 @@ func New(baseURL, apiKey string) *Client { } } +// SetCustomHeaders configures extra headers attached to every request. The +// map is used as-is (values must already be expanded/validated by the caller). +// Passing nil or an empty map is a no-op — current behavior is preserved. +func (c *Client) SetCustomHeaders(h map[string]string) { + c.customHeaders = h +} + +// applyCustomHeaders attaches the configured custom headers to req. It is +// always called BEFORE the cix-managed headers (Authorization, Content-Type, +// Accept) are set, so a stray config value can never clobber authentication. +func (c *Client) applyCustomHeaders(req *http.Request) { + for k, v := range c.customHeaders { + req.Header.Set(k, v) + } +} + // SetStreamingIdleTimeout overrides the silence threshold for streaming // endpoints. Pass 0 to disable the watchdog entirely (not recommended). func (c *Client) SetStreamingIdleTimeout(d time.Duration) { @@ -77,6 +99,8 @@ func (c *Client) do(method, path string, body interface{}) (*http.Response, erro return nil, fmt.Errorf("create request: %w", err) } + // Custom headers first, then cix-managed headers — so the latter win. + c.applyCustomHeaders(req) req.Header.Set("Authorization", "Bearer "+c.apiKey) if body != nil { req.Header.Set("Content-Type", "application/json") @@ -192,9 +216,17 @@ func parseResponse(resp *http.Response, v interface{}) error { return nil } -// Health checks if the API server is running +// Health checks if the API server is running. Although /health is public on +// cix, the request must still carry any custom headers — behind an +// authenticating reverse proxy the probe would otherwise be bounced (302/403) +// at the edge before reaching cix. func (c *Client) Health() error { - resp, err := c.httpClient.Get(c.baseURL + "/health") + req, err := http.NewRequest(http.MethodGet, c.baseURL+"/health", nil) + if err != nil { + return fmt.Errorf("health check failed: %w", err) + } + c.applyCustomHeaders(req) + resp, err := c.httpClient.Do(req) if err != nil { return fmt.Errorf("health check failed: %w", err) } diff --git a/cli/internal/client/client_test.go b/cli/internal/client/client_test.go new file mode 100644 index 0000000..840a4e2 --- /dev/null +++ b/cli/internal/client/client_test.go @@ -0,0 +1,109 @@ +package client + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +// TestDoSendsCustomHeaders verifies the authenticated request path attaches +// configured custom headers in addition to the cix Bearer. +func TestDoSendsCustomHeaders(t *testing.T) { + var got http.Header + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + got = r.Header.Clone() + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + })) + defer srv.Close() + + c := New(srv.URL, "realkey") + c.SetCustomHeaders(map[string]string{ + "CF-Access-Client-Id": "abc.access", + "CF-Access-Client-Secret": "s3cr3t", + }) + + resp, err := c.do("GET", "/api/v1/status", nil) + if err != nil { + t.Fatalf("do: %v", err) + } + resp.Body.Close() + + if got.Get("CF-Access-Client-Id") != "abc.access" { + t.Errorf("CF-Access-Client-Id = %q, want abc.access", got.Get("CF-Access-Client-Id")) + } + if got.Get("CF-Access-Client-Secret") != "s3cr3t" { + t.Errorf("CF-Access-Client-Secret = %q, want s3cr3t", got.Get("CF-Access-Client-Secret")) + } + if got.Get("Authorization") != "Bearer realkey" { + t.Errorf("Authorization = %q, want Bearer realkey", got.Get("Authorization")) + } +} + +// TestHealthSendsCustomHeaders verifies the /health probe also carries custom +// headers — otherwise it would be bounced at an authenticating proxy. +func TestHealthSendsCustomHeaders(t *testing.T) { + var got http.Header + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + got = r.Header.Clone() + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + c := New(srv.URL, "k") + c.SetCustomHeaders(map[string]string{"CF-Access-Client-Id": "abc.access"}) + + if err := c.Health(); err != nil { + t.Fatalf("Health: %v", err) + } + if got.Get("CF-Access-Client-Id") != "abc.access" { + t.Errorf("/health missing custom header; got %q", got.Get("CF-Access-Client-Id")) + } +} + +// TestCustomHeadersDoNotOverrideAuthorization ensures a stray custom +// "Authorization" cannot clobber the cix Bearer (cix-managed headers win). +func TestCustomHeadersDoNotOverrideAuthorization(t *testing.T) { + var got http.Header + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + got = r.Header.Clone() + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + })) + defer srv.Close() + + c := New(srv.URL, "realkey") + c.SetCustomHeaders(map[string]string{"Authorization": "Bearer EVIL"}) + + resp, err := c.do("GET", "/api/v1/status", nil) + if err != nil { + t.Fatalf("do: %v", err) + } + resp.Body.Close() + + if got.Get("Authorization") != "Bearer realkey" { + t.Errorf("Authorization = %q, want Bearer realkey (custom must not win)", got.Get("Authorization")) + } +} + +// TestNoCustomHeadersIsCleanRequest confirms the opt-in nature: with none set, +// only the cix-managed headers go out. +func TestNoCustomHeadersIsCleanRequest(t *testing.T) { + var got http.Header + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + got = r.Header.Clone() + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + })) + defer srv.Close() + + c := New(srv.URL, "k") + resp, err := c.do("GET", "/api/v1/status", nil) + if err != nil { + t.Fatalf("do: %v", err) + } + resp.Body.Close() + if got.Get("Authorization") != "Bearer k" { + t.Errorf("Authorization = %q, want Bearer k", got.Get("Authorization")) + } +} diff --git a/cli/internal/client/index.go b/cli/internal/client/index.go index 69ded20..6917962 100644 --- a/cli/internal/client/index.go +++ b/cli/internal/client/index.go @@ -157,6 +157,8 @@ func (c *Client) SendFilesStreaming( streamCancel() return nil, fmt.Errorf("create request: %w", err) } + // Custom headers first, then cix-managed headers — so the latter win. + c.applyCustomHeaders(req) req.Header.Set("Content-Type", "application/json") req.Header.Set("Accept", "application/x-ndjson") if c.apiKey != "" { diff --git a/cli/internal/config/config.go b/cli/internal/config/config.go index 7f6dde2..f0662aa 100644 --- a/cli/internal/config/config.go +++ b/cli/internal/config/config.go @@ -40,6 +40,14 @@ type ServerEntry struct { Name string `yaml:"name" desc:"Server alias"` URL string `yaml:"url" desc:"Base URL of the cix server" validate:"omitempty,url"` Key string `yaml:"key" desc:"API key (bearer token)" sensitive:"true"` + // Headers are extra HTTP headers attached to every request the CLI makes + // to this server (in addition to the cix Bearer). They let the client + // satisfy an authenticating reverse proxy / Zero-Trust gateway in front + // of cix — e.g. a Cloudflare Access service token + // (CF-Access-Client-Id / CF-Access-Client-Secret). Values support + // ${ENV} expansion at request time so secrets stay out of the file. + // Opt-in: absent = current behavior. + Headers map[string]string `yaml:"headers,omitempty" sensitive:"true" desc:"Extra HTTP headers sent on every request (values support ${ENV} expansion)"` } type APIConfig struct { @@ -306,6 +314,85 @@ func SetServerKey(name, key string) error { return Save(cfg) } +// SetServerHeader sets (or creates) a custom HTTP header on the named server +// and persists. The header name must be a valid HTTP token and the value must +// not contain CR/LF (header-injection guard). Creates the server entry if it +// does not exist yet, mirroring SetServerURL/SetServerKey. +func SetServerHeader(name, headerName, value string) error { + if err := validateServerName(name); err != nil { + return err + } + if err := validateHeader(headerName, value); err != nil { + return err + } + cfg, err := Load() + if err != nil { + return err + } + upsertServer(cfg, name, func(s *ServerEntry) { + if s.Headers == nil { + s.Headers = map[string]string{} + } + s.Headers[headerName] = value + }) + return Save(cfg) +} + +// UnsetServerHeader removes a custom header from the named server and persists. +// Missing server or missing header is a no-op (the post-condition — header +// absent — already holds). +func UnsetServerHeader(name, headerName string) error { + cfg, err := Load() + if err != nil { + return err + } + s, ok := cfg.GetServer(name) + if !ok { + return fmt.Errorf("server %q not found", name) + } + if s.Headers == nil { + return nil + } + delete(s.Headers, headerName) + if len(s.Headers) == 0 { + // Drop the empty map so it round-trips out of the YAML (omitempty). + s.Headers = nil + } + return Save(cfg) +} + +// isHeaderTokenChar reports whether r is allowed in an HTTP field-name per +// RFC 7230 §3.2.6 (token). Used to reject malformed / injection-prone names. +func isHeaderTokenChar(r rune) bool { + switch { + case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9': + return true + } + return strings.ContainsRune("!#$%&'*+-.^_`|~", r) +} + +// validateHeader checks a custom header name/value pair. The name must be a +// non-empty RFC 7230 token; the value must not embed CR or LF (which would let +// a config value inject additional headers or split the request). +func validateHeader(name, value string) error { + if name == "" { + return fmt.Errorf("header name must not be empty") + } + for _, r := range name { + if !isHeaderTokenChar(r) { + return fmt.Errorf("invalid character %q in header name %q (must be a valid HTTP token)", r, name) + } + } + if strings.ContainsAny(value, "\r\n") { + return fmt.Errorf("header %q value must not contain CR or LF", name) + } + return nil +} + +// ValidateHeader is the exported guard for callers outside this package (e.g. +// getClient, after ${ENV} expansion). Same rules as the internal check. +func ValidateHeader(name, value string) error { return validateHeader(name, value) } + // SetDefaultServer marks an existing server as the default and persists. func SetDefaultServer(name string) error { cfg, err := Load() diff --git a/cli/internal/config/headers_test.go b/cli/internal/config/headers_test.go new file mode 100644 index 0000000..a3fde83 --- /dev/null +++ b/cli/internal/config/headers_test.go @@ -0,0 +1,124 @@ +package config + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestSetServerHeaderRoundTrip(t *testing.T) { + withIsolatedHome(t) + if err := SetServerURL("corp", "https://corp.example"); err != nil { + t.Fatal(err) + } + if err := SetServerHeader("corp", "CF-Access-Client-Id", "abc.access"); err != nil { + t.Fatalf("SetServerHeader: %v", err) + } + if err := SetServerHeader("corp", "CF-Access-Client-Secret", "${CIX_SECRET}"); err != nil { + t.Fatalf("SetServerHeader: %v", err) + } + + ResetForTesting() + cfg, err := Load() + if err != nil { + t.Fatalf("reload: %v", err) + } + s, ok := cfg.GetServer("corp") + if !ok { + t.Fatal("server corp missing after reload") + } + if got := s.Headers["CF-Access-Client-Id"]; got != "abc.access" { + t.Errorf("CF-Access-Client-Id = %q, want abc.access", got) + } + // Secrets are stored verbatim (the ${ENV} placeholder), never expanded on disk. + if got := s.Headers["CF-Access-Client-Secret"]; got != "${CIX_SECRET}" { + t.Errorf("secret header = %q, want literal ${CIX_SECRET}", got) + } + + // And the placeholder must actually be on disk, not a resolved value. + raw, err := os.ReadFile(filepath.Join(os.Getenv("HOME"), ".cix", "config.yaml")) + if err != nil { + t.Fatalf("read config: %v", err) + } + if !strings.Contains(string(raw), "${CIX_SECRET}") { + t.Errorf("config file should retain the ${CIX_SECRET} placeholder:\n%s", raw) + } +} + +func TestUnsetServerHeader(t *testing.T) { + withIsolatedHome(t) + if err := SetServerURL("corp", "https://corp.example"); err != nil { + t.Fatal(err) + } + if err := SetServerHeader("corp", "X-One", "1"); err != nil { + t.Fatal(err) + } + if err := SetServerHeader("corp", "X-Two", "2"); err != nil { + t.Fatal(err) + } + if err := UnsetServerHeader("corp", "X-One"); err != nil { + t.Fatalf("UnsetServerHeader: %v", err) + } + cfg, _ := Load() + s, _ := cfg.GetServer("corp") + if _, ok := s.Headers["X-One"]; ok { + t.Error("X-One should be removed") + } + if s.Headers["X-Two"] != "2" { + t.Error("X-Two should remain") + } + + // Removing the last header drops the map entirely (omitempty round-trip). + if err := UnsetServerHeader("corp", "X-Two"); err != nil { + t.Fatal(err) + } + cfg, _ = Load() + s, _ = cfg.GetServer("corp") + if s.Headers != nil { + t.Errorf("Headers should be nil after last removal, got %v", s.Headers) + } + + // Unsetting a missing header / server is a no-op / clean error. + if err := UnsetServerHeader("corp", "X-Missing"); err != nil { + t.Errorf("unset missing header should be no-op, got %v", err) + } + if err := UnsetServerHeader("nope", "X"); err == nil { + t.Error("unset on missing server should error") + } +} + +func TestValidateHeader(t *testing.T) { + cases := []struct { + name, value string + ok bool + }{ + {"CF-Access-Client-Id", "abc.access", true}, + {"X-Token", "${VAR}", true}, + {"Content.Type", "x", true}, // dot is a valid token char + {"", "x", false}, // empty name + {"Bad Name", "x", false}, // space in name + {"Bad:Name", "x", false}, // colon in name + {"X-Inject", "a\r\nEvil: 1", false}, // CRLF in value + {"X-LF", "a\nb", false}, // LF in value + } + for _, c := range cases { + err := ValidateHeader(c.name, c.value) + if c.ok && err != nil { + t.Errorf("ValidateHeader(%q,…) unexpected error: %v", c.name, err) + } + if !c.ok && err == nil { + t.Errorf("ValidateHeader(%q,…) expected error, got nil", c.name) + } + } +} + +func TestSetServerHeaderRejectsInvalid(t *testing.T) { + withIsolatedHome(t) + if err := SetServerHeader("corp", "Bad Name", "v"); err == nil { + t.Error("expected error for invalid header name") + } + if err := SetServerHeader("corp", "X", "a\r\nb"); err == nil { + t.Error("expected error for CRLF in value") + } +} diff --git a/cli/internal/config/tui/sections.go b/cli/internal/config/tui/sections.go index 52c4950..15b3853 100644 --- a/cli/internal/config/tui/sections.go +++ b/cli/internal/config/tui/sections.go @@ -29,11 +29,11 @@ type row struct { type rowKind int const ( - rowKindInert rowKind = iota // no action on Enter - rowKindScalarEdit // Enter opens text input; SetByPath on save - rowKindBoolToggle // space/x flips bool; Enter does too - rowKindServerEdit // Enter opens server URL/key editor - rowKindDefaultPick // Enter cycles default_server alias + rowKindInert rowKind = iota // no action on Enter + rowKindScalarEdit // Enter opens text input; SetByPath on save + rowKindBoolToggle // space/x flips bool; Enter does too + rowKindServerEdit // Enter opens server URL/key editor + rowKindDefaultPick // Enter cycles default_server alias ) // rowsFor returns the rendered rows for the currently selected section. @@ -70,9 +70,16 @@ func serverRows(cfg *config.Config) []row { if s.Key != "" { keyStatus = "(set)" } + value := fmt.Sprintf("%s key %s", s.URL, keyStatus) + // Surface custom headers by COUNT only — values may be secrets and are + // never shown in the TUI. Edit them via `cix config set + // server..header.` or by hand in config.yaml. + if n := len(s.Headers); n > 0 { + value += fmt.Sprintf(" headers %d", n) + } out = append(out, row{ label: fmt.Sprintf("%s %s", marker, s.Name), - value: fmt.Sprintf("%s key %s", s.URL, keyStatus), + value: value, kind: rowKindServerEdit, serverIdx: i, }) diff --git a/doc/CLI_CONFIG.md b/doc/CLI_CONFIG.md index eef97b9..ab4b580 100644 --- a/doc/CLI_CONFIG.md +++ b/doc/CLI_CONFIG.md @@ -33,7 +33,7 @@ preferences and have no env binding. | `cix config show` | Human-readable dump of the current configuration | | `cix config keys` | List every settable key with default, env, description | | `cix config set ` | Set one key — supports scalars + comma-separated lists | -| `cix config unset server.[.key]` | Remove a server entry or just clear its key | +| `cix config unset server.[.key\|.header.]` | Remove a server entry, clear its key, or drop one custom header | | `cix config edit` | Interactive TUI form (huh) for the whole file | | `cix config init` | First-run wizard — same form, pre-seeded for fresh installs | | `cix config path` | Print the file path (useful in scripts) | @@ -48,6 +48,7 @@ preferences and have no env binding. | `default_server` | string | `default` | Active alias when `--server`/`CIX_SERVER` are unset | | `server..url` | string | — | URL of a named server (creates the entry on first set) | | `server..key` | string | — | API key for the named server (sensitive — never printed) | +| `server..header.` | string | — | Custom HTTP header sent on every request (sensitive — never printed; values support `${ENV}` expansion). See [Custom request headers](#custom-request-headers-auth-behind-a-reverse-proxy) | | `api.url` / `api.key` | string | — | Legacy aliases — operate on the default server | ### File watcher @@ -72,6 +73,56 @@ preferences and have no env binding. |----------------------------|-------------|-------------| | `projects` | list | Managed via `cix init` / dashboard — not editable via `config set` | +## Custom request headers (auth behind a reverse proxy) + +When cix is self-hosted behind an authenticating reverse proxy / Zero-Trust +gateway (Cloudflare Access, oauth2-proxy, Authelia, an SSO/mTLS-terminating +load balancer, a corporate proxy), the proxy decides whether a request even +reaches cix. The browser dashboard passes it via interactive SSO, but the CLI +and AI-agent tooling send only the cix Bearer and would be bounced at the edge +(302 to login / 403) off-VPN. + +Per-server **custom headers** let the CLI attach whatever the proxy needs on +**every** request (including the `/health` probe) — e.g. a Cloudflare Access +service token — *in addition* to the cix Bearer. The proxy validates and +strips those headers at the edge, so the cix origin still only sees the Bearer +— **cix needs no knowledge of the proxy**. Opt-in: with no headers configured, +behavior is unchanged. + +```yaml +servers: + - name: default + url: https://cix.example.com + key: cix_xxx + headers: # attached to every request + CF-Access-Client-Id: ".access" + CF-Access-Client-Secret: "${CIX_CF_ACCESS_SECRET}" # ${ENV} expansion +``` + +Set / remove them from the command line: + +```bash +cix config set server.default.header.CF-Access-Client-Id ".access" +cix config set server.default.header.CF-Access-Client-Secret '${CIX_CF_ACCESS_SECRET}' +cix config unset server.default.header.CF-Access-Client-Id +``` + +Notes: + +- **`${ENV}` expansion** happens at request time into a throwaway copy — the + config file keeps the literal `${CIX_CF_ACCESS_SECRET}`, so no plaintext + secret is persisted. Use `${VAR}` / `$VAR`; quote the value in your shell so + it isn't expanded before cix sees it. +- **Never printed.** Header values are sensitive: `cix config show` and + `cix config edit` (TUI) surface only the **count** (`headers=N` / `headers N`), + never names or values. Edit them via `cix config set` or by hand in + `~/.cix/config.yaml` — the TUI does not edit individual headers. +- **Safety.** cix-managed headers (`Authorization`, `Content-Type`, `Accept`) + always win, so a custom header can never clobber authentication. Header names + must be valid HTTP tokens and values may not contain CR/LF. +- For human users a short-lived per-user proxy login is preferable; service + tokens suit machine/agent clients. Both just need the client to send headers. + ## Env vars | Variable | Overrides | Notes | @@ -101,8 +152,14 @@ preferences and have no env binding. - **TUI**: [`charmbracelet/huh`](https://github.com/charmbracelet/huh) builds the paged form. The Charm stack (`huh` + `bubbletea` + `lipgloss`) is the visual layer for any future TUI screens too. -- **Sensitive fields**: `ServerEntry.Key` carries `sensitive:"true"`. - Renderers print `(set)` / `(not set)`, never the value. CodeQL's - `go/clear-text-logging` heuristic flags reads of `*Key`/`*Secret` - into named variables, so the tag is read off `reflect.StructField` - and the value goes through `reflect.Value.IsZero()` only. +- **Sensitive fields**: `ServerEntry.Key` and `ServerEntry.Headers` carry + `sensitive:"true"`. Renderers print `(set)` / `(not set)` for the key and a + header **count** only, never a value. CodeQL's `go/clear-text-logging` + heuristic flags reads of `*Key`/`*Secret` into named variables, so the tag is + read off `reflect.StructField` and the value goes through + `reflect.Value.IsZero()` only. +- **Custom headers**: `ServerEntry.Headers` is attached to every request by + `client.applyCustomHeaders` (called before the cix-managed headers so those + win). `${ENV}` expansion + validation happen in `getClient` + (`cli/cmd/root.go`) into a local copy that is never written back — mirroring + the `--api-url`/`--api-key` override pattern. diff --git a/doc/SECURITY_DEPLOYMENT.md b/doc/SECURITY_DEPLOYMENT.md index c387239..6259e97 100644 --- a/doc/SECURITY_DEPLOYMENT.md +++ b/doc/SECURITY_DEPLOYMENT.md @@ -56,6 +56,18 @@ plain HTTP for the upstream hop, the auto-detection will return false and - Or configure the proxy to make the upstream hop look TLS-marked — the details vary; consult the proxy docs. +## Authenticating reverse proxy / Zero-Trust gateway + +If you put an authenticating proxy in front of cix (Cloudflare Access, +oauth2-proxy, Authelia, an SSO/mTLS-terminating LB), the browser dashboard +passes it via interactive SSO, but the `cix` CLI and AI-agent tooling send only +the cix Bearer and get bounced at the edge (302/403) unless their source IP is +allow-listed. The CLI can attach **custom request headers** (e.g. a Cloudflare +Access service token) on every request to satisfy the edge layer in addition to +the cix Bearer — the proxy validates and strips them, so cix needs no knowledge +of the proxy. See +[`CLI_CONFIG.md` → Custom request headers](CLI_CONFIG.md#custom-request-headers-auth-behind-a-reverse-proxy). + ## Login brute-force resistance POST `/api/v1/auth/login` is rate-limited in process (`internal/httpapi/loginlimiter.go`): From 7427d71e4ea76c13d88451642b71c99262021550 Mon Sep 17 00:00:00 2001 From: dvcdsys Date: Fri, 5 Jun 2026 13:14:56 +0100 Subject: [PATCH 4/4] fix(cli): strict env expansion for custom headers (code review #1, #2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address two footguns in the ${ENV} expansion of custom request headers, both found in review of #59: 1. An unset (typo'd / unexported) variable used to expand to "" via os.ExpandEnv and be sent as an empty header — bouncing at the proxy with an opaque 403 and no hint. Now ExpandEnvHeaderValue treats a referenced-but-unset variable as a hard error that NAMES the variable (never the value). A set-but-empty var is still honored as intentional. 2. os.ExpandEnv mangled a literal `$` in a value (e.g. `pa$$word` → `pa`). The new expander supports `$$` as an escape for a literal `$`, so values containing `$` survive intact. Also documents header-name canonicalization (CF- → Cf-, harmless since HTTP header names are case-insensitive) in CLI_CONFIG.md. Tests: ExpandEnvHeaderValue table (escapes, set/empty/unset) and a getClient test proving an unset header env var fails loudly. Co-Authored-By: Claude Opus 4.8 --- cli/cmd/headers_test.go | 26 ++++++++++ cli/cmd/root.go | 11 +++-- cli/internal/config/config.go | 76 +++++++++++++++++++++++++++++ cli/internal/config/headers_test.go | 39 +++++++++++++++ doc/CLI_CONFIG.md | 12 +++++ 5 files changed, 161 insertions(+), 3 deletions(-) diff --git a/cli/cmd/headers_test.go b/cli/cmd/headers_test.go index 31798b2..1ba3bbb 100644 --- a/cli/cmd/headers_test.go +++ b/cli/cmd/headers_test.go @@ -62,6 +62,32 @@ func TestGetClient_ExpandsHeaderEnvVars(t *testing.T) { } } +// TestGetClient_UnsetHeaderEnvVarErrors ensures a header referencing an unset +// env var fails getClient loudly (naming the var) instead of silently sending +// an empty header that would bounce at the proxy — finding #1. +func TestGetClient_UnsetHeaderEnvVarErrors(t *testing.T) { + isolateConfig(t) + if err := config.SetServerURL(config.DefaultServerName, "http://localhost:21847"); err != nil { + t.Fatal(err) + } + if err := config.SetServerKey(config.DefaultServerName, "k"); err != nil { + t.Fatal(err) + } + if err := config.SetServerHeader(config.DefaultServerName, "CF-Access-Client-Secret", "${CIX_DEFINITELY_UNSET_VAR}"); err != nil { + t.Fatal(err) + } + // Deliberately do NOT set CIX_DEFINITELY_UNSET_VAR. + withFlags(t, "", "", "") + + _, err := getClient() + if err == nil { + t.Fatal("expected getClient to fail on an unset header env var") + } + if !strings.Contains(err.Error(), "CIX_DEFINITELY_UNSET_VAR") { + t.Errorf("error should name the missing variable, got %v", err) + } +} + // TestGetClient_InvalidHeaderErrors ensures a malformed header (after env // expansion) fails getClient loudly and never echoes the value. func TestGetClient_InvalidHeaderErrors(t *testing.T) { diff --git a/cli/cmd/root.go b/cli/cmd/root.go index c749e0e..1539e90 100644 --- a/cli/cmd/root.go +++ b/cli/cmd/root.go @@ -196,12 +196,17 @@ func getClient() (*client.Client, error) { // Custom headers: ${ENV}-expand each value into a local copy (never // written back to disk, like the url/key overrides above) so secrets can - // stay in the environment rather than in config.yaml. Validate after - // expansion; on a bad header fail loudly WITHOUT echoing the value. + // stay in the environment rather than in config.yaml. Expansion is strict — + // a referenced-but-unset variable is an error, not a silent empty header — + // and validates after expansion. All failures name the header/variable but + // never echo the resolved value. if len(srv.Headers) > 0 { expanded := make(map[string]string, len(srv.Headers)) for name, raw := range srv.Headers { - val := os.ExpandEnv(raw) + val, err := config.ExpandEnvHeaderValue(raw) + if err != nil { + return nil, fmt.Errorf("custom header %q for server %q: %w", name, srv.Name, err) + } if err := config.ValidateHeader(name, val); err != nil { return nil, fmt.Errorf("invalid custom header %q for server %q: %w", name, srv.Name, err) } diff --git a/cli/internal/config/config.go b/cli/internal/config/config.go index f0662aa..a1ba664 100644 --- a/cli/internal/config/config.go +++ b/cli/internal/config/config.go @@ -393,6 +393,82 @@ func validateHeader(name, value string) error { // getClient, after ${ENV} expansion). Same rules as the internal check. func ValidateHeader(name, value string) error { return validateHeader(name, value) } +// ExpandEnvHeaderValue expands $VAR / ${VAR} references in a header value using +// the environment, returning an error that NAMES the first referenced variable +// that is not set. This is stricter than os.ExpandEnv on purpose: +// +// - A reference to an UNSET variable is an error, not a silent empty string. +// The whole point of custom headers is to satisfy an authenticating proxy; +// a forgotten `export` or a typo'd var name must fail loudly here rather +// than send an empty `CF-Access-Client-Secret:` and bounce at the edge with +// an opaque 403. A variable that is SET but empty (`export X=`) is honored +// as an intentional empty value. +// - `$$` is an escape for a literal `$`, so a value that legitimately +// contains `$` (e.g. a token) can be written `pa$$word` without being +// mangled into `pa` + an env lookup. +// +// The error never contains the resolved value (only the variable name), so it +// is safe to surface to the user / logs. +func ExpandEnvHeaderValue(s string) (string, error) { + var b strings.Builder + for i := 0; i < len(s); { + if s[i] != '$' { + b.WriteByte(s[i]) + i++ + continue + } + // `$$` → literal `$`. + if i+1 < len(s) && s[i+1] == '$' { + b.WriteByte('$') + i += 2 + continue + } + name, width := envRefName(s[i+1:]) + if name == "" { + // Lone `$` or unparseable reference: keep the `$` literal. + b.WriteByte('$') + i++ + continue + } + val, ok := os.LookupEnv(name) + if !ok { + return "", fmt.Errorf("environment variable %q is not set", name) + } + b.WriteString(val) + i += 1 + width + } + return b.String(), nil +} + +// envRefName parses the variable name immediately following a `$`. It accepts +// `{VAR}` (braced) and bare `VAR` (a run of [A-Za-z0-9_]). It returns the name +// and how many bytes it consumed (excluding the leading `$`); ("", 0) means the +// `$` does not introduce a valid reference. +func envRefName(s string) (name string, width int) { + if len(s) == 0 { + return "", 0 + } + if s[0] == '{' { + end := strings.IndexByte(s, '}') + if end <= 1 { // no closing brace, or empty `${}` + return "", 0 + } + return s[1:end], end + 1 + } + var j int + for j < len(s) && isEnvNameByte(s[j]) { + j++ + } + return s[:j], j +} + +func isEnvNameByte(c byte) bool { + return c == '_' || + ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || + ('0' <= c && c <= '9') +} + // SetDefaultServer marks an existing server as the default and persists. func SetDefaultServer(name string) error { cfg, err := Load() diff --git a/cli/internal/config/headers_test.go b/cli/internal/config/headers_test.go index a3fde83..0a27cab 100644 --- a/cli/internal/config/headers_test.go +++ b/cli/internal/config/headers_test.go @@ -113,6 +113,45 @@ func TestValidateHeader(t *testing.T) { } } +func TestExpandEnvHeaderValue(t *testing.T) { + t.Setenv("CIX_H_SET", "secret-val") + t.Setenv("CIX_H_EMPTY", "") // set-but-empty is honored + + ok := []struct{ in, want string }{ + {"plain", "plain"}, + {"${CIX_H_SET}", "secret-val"}, + {"$CIX_H_SET", "secret-val"}, + {"pre-${CIX_H_SET}-post", "pre-secret-val-post"}, + {"${CIX_H_EMPTY}", ""}, // set-but-empty → "" (no error) + {"pa$$word", "pa$word"}, // $$ escapes a literal $ + {"$$CIX_H_SET", "$CIX_H_SET"}, // escaped $ then literal text, no lookup + {"100$$", "100$"}, // trailing $$ + {"a$ b", "a$ b"}, // lone $ before non-name kept literal + } + for _, c := range ok { + got, err := ExpandEnvHeaderValue(c.in) + if err != nil { + t.Errorf("ExpandEnvHeaderValue(%q) unexpected error: %v", c.in, err) + continue + } + if got != c.want { + t.Errorf("ExpandEnvHeaderValue(%q) = %q, want %q", c.in, got, c.want) + } + } + + // Unset variable → loud error that names the var but not (there is no) value. + for _, in := range []string{"${CIX_H_UNSET}", "$CIX_H_UNSET", "x${CIX_H_UNSET}y"} { + _, err := ExpandEnvHeaderValue(in) + if err == nil { + t.Errorf("ExpandEnvHeaderValue(%q) expected error for unset var", in) + continue + } + if !strings.Contains(err.Error(), "CIX_H_UNSET") { + t.Errorf("error should name the missing variable, got %v", err) + } + } +} + func TestSetServerHeaderRejectsInvalid(t *testing.T) { withIsolatedHome(t) if err := SetServerHeader("corp", "Bad Name", "v"); err == nil { diff --git a/doc/CLI_CONFIG.md b/doc/CLI_CONFIG.md index ab4b580..a66692f 100644 --- a/doc/CLI_CONFIG.md +++ b/doc/CLI_CONFIG.md @@ -113,6 +113,18 @@ Notes: config file keeps the literal `${CIX_CF_ACCESS_SECRET}`, so no plaintext secret is persisted. Use `${VAR}` / `$VAR`; quote the value in your shell so it isn't expanded before cix sees it. +- **Unset variable = hard error.** If a header references a variable that is + not set (a typo or a forgotten `export`), the command fails with a message + naming the variable instead of silently sending an empty header that bounces + at the proxy. A variable that is *set but empty* (`export X=`) is honored as + an intentional empty value. +- **Literal `$`.** Expansion treats `$NAME` / `${NAME}` as references; to put a + literal `$` in a value, write `$$` (e.g. a token `pa$$word` is sent as + `pa$word`). +- **Header-name case.** Names are canonicalized on the wire (Go's + `http.Header`), so `CF-Access-Client-Id` is sent as `Cf-Access-Client-Id`. + HTTP header names are case-insensitive (RFC 7230) and Cloudflare Access et al. + honor that, so this is normally invisible. - **Never printed.** Header values are sensitive: `cix config show` and `cix config edit` (TUI) surface only the **count** (`headers=N` / `headers N`), never names or values. Edit them via `cix config set` or by hand in