diff --git a/cmd/ratify-gatekeeper-provider/main.go b/cmd/ratify-gatekeeper-provider/main.go index 1a4b895b0..1292eb3f1 100644 --- a/cmd/ratify-gatekeeper-provider/main.go +++ b/cmd/ratify-gatekeeper-provider/main.go @@ -18,14 +18,20 @@ package main import ( "errors" "flag" + "fmt" "time" + "github.com/notaryproject/ratify/v2/internal/healthprobe" "github.com/notaryproject/ratify/v2/internal/httpserver" "github.com/notaryproject/ratify/v2/internal/manager" "github.com/sirupsen/logrus" ) -var startManagerFunc = manager.StartManager +var managerReadySignal *manager.ReadySignal + +var startManagerFunc = func(certRotatorReady chan struct{}, disableMutation bool, disableCRDManager bool) { + manager.StartManager(certRotatorReady, managerReadySignal, disableMutation, disableCRDManager) +} // main is the entry point for the Ratify server. func main() { @@ -38,6 +44,7 @@ func main() { type options struct { configFilePath string httpServerAddress string + healthPort int certFile string keyFile string gatekeeperCACertFile string @@ -52,6 +59,7 @@ func parse() *options { opts := &options{} flag.StringVar(&opts.configFilePath, "config", "", "Path to the Ratify configuration file") flag.StringVar(&opts.httpServerAddress, "address", "", "HTTP server address") + flag.IntVar(&opts.healthPort, "health-port", 9090, "Dedicated health probe port") flag.StringVar(&opts.certFile, "cert-file", "", "Path to the TLS certificate file") flag.StringVar(&opts.keyFile, "key-file", "", "Path to the TLS key file") flag.StringVar(&opts.gatekeeperCACertFile, "gatekeeper-ca-cert-file", "", "Path to the Gatekeeper CA certificate file") @@ -67,13 +75,35 @@ func parse() *options { } func startRatify(opts *options) error { + if opts == nil { + return errors.New("options are required") + } if len(opts.httpServerAddress) == 0 { return errors.New("HTTP server address is required") } + if opts.healthPort <= 0 { + return errors.New("health port must be greater than zero") + } + var certRotatorReady chan struct{} if !opts.disableCertRotation { certRotatorReady = make(chan struct{}) } + + healthRegistry := healthprobe.NewRegistry() + managerReadySignal = manager.NewReadySignal() + defer func() { + managerReadySignal = nil + }() + if err := healthRegistry.RegisterReadiness(managerReadySignal.Checker()); err != nil { + return fmt.Errorf("failed to register manager readiness checker: %w", err) + } + + healthServer, err := healthprobe.NewServer(fmt.Sprintf(":%d", opts.healthPort), healthRegistry) + if err != nil { + return fmt.Errorf("failed to create health probe server: %w", err) + } + serverOpts := &httpserver.ServerOptions{ HTTPServerAddress: opts.httpServerAddress, CertFile: opts.certFile, @@ -84,8 +114,14 @@ func startRatify(opts *options) error { DisableMutation: opts.disableMutation, DisableCRDManager: opts.disableCRDManager, CertRotatorReady: certRotatorReady, + HealthRegistry: healthRegistry, } + go func() { + if err := healthServer.Start(); err != nil { + logrus.WithError(err).Fatal("failed to start health probe server") + } + }() go startManagerFunc(certRotatorReady, serverOpts.DisableMutation, serverOpts.DisableCRDManager) return httpserver.StartServer(serverOpts, opts.configFilePath) } diff --git a/cmd/ratify-gatekeeper-provider/main_test.go b/cmd/ratify-gatekeeper-provider/main_test.go index ccddba3ca..c91ad6941 100644 --- a/cmd/ratify-gatekeeper-provider/main_test.go +++ b/cmd/ratify-gatekeeper-provider/main_test.go @@ -63,6 +63,7 @@ func TestParse(t *testing.T) { expected: &options{ configFilePath: "config.json", httpServerAddress: ":8080", + healthPort: 9090, certFile: "cert.pem", keyFile: "key.pem", verifyTimeout: 10 * time.Second, @@ -76,6 +77,7 @@ func TestParse(t *testing.T) { "-mutate-timeout=10s", }, expected: &options{ + healthPort: 9090, verifyTimeout: 30 * time.Second, mutateTimeout: 10 * time.Second, }, @@ -84,6 +86,7 @@ func TestParse(t *testing.T) { name: "default values", args: []string{}, expected: &options{ + healthPort: 9090, verifyTimeout: 5 * time.Second, mutateTimeout: 2 * time.Second, }, @@ -129,6 +132,7 @@ func TestStartRatify(t *testing.T) { name: "failed to start the server", opts: &options{ httpServerAddress: ":8080", + healthPort: 9090, configFilePath: "config.yaml", certFile: "cert.pem", disableCertRotation: true, diff --git a/deployments/ratify-gatekeeper-provider/README.md b/deployments/ratify-gatekeeper-provider/README.md index 98306bbe8..3dc9ae957 100644 --- a/deployments/ratify-gatekeeper-provider/README.md +++ b/deployments/ratify-gatekeeper-provider/README.md @@ -40,6 +40,7 @@ Values marked `# DEPRECATED` in the `values.yaml` as well as **DEPRECATED** in t | `image.tag` | Image tag | `` | | `image.pullPolicy` | Image pull policy | `IfNotPresent` | | `replicaCount` | Number of replicas to run | `1` | +| `provider.healthPort` | Port exposed for the liveness and readiness health endpoints. | `9090` | | `notation.scopes` | Scopes that Notation verifier is applicable for. See [Notation trust policy](https://github.com/notaryproject/specifications/blob/main/specs/trust-store-trust-policy.md#trust-policy). | `[]` | | `notation.trustedIdentities` | List of trusted identities for Notation verifier. See [Notation trust policy](https://github.com/notaryproject/specifications/blob/main/specs/trust-store-trust-policy.md#trust-policy). | `[]` | | `notation.certs` | List of trusted root certificates for Notation verifier. | `[]` | diff --git a/deployments/ratify-gatekeeper-provider/templates/deployment.yaml b/deployments/ratify-gatekeeper-provider/templates/deployment.yaml index 40fa8cdfb..c5a0efe56 100644 --- a/deployments/ratify-gatekeeper-provider/templates/deployment.yaml +++ b/deployments/ratify-gatekeeper-provider/templates/deployment.yaml @@ -40,6 +40,8 @@ spec: args: - "--address" - ":6001" + - "--health-port" + - {{ .Values.provider.healthPort | quote }} - "--config" - "/usr/local/config.json" {{- if .Values.provider.timeout.validationTimeoutSeconds }} @@ -65,7 +67,22 @@ spec: - "--gatekeeper-ca-cert-file=/usr/local/tls/client-ca/ca.crt" {{- end }} ports: - - containerPort: 6001 + - name: https + containerPort: 6001 + - name: health + containerPort: {{ .Values.provider.healthPort }} + livenessProbe: + httpGet: + path: /healthz + port: {{ .Values.provider.healthPort | default 9090 }} + initialDelaySeconds: 5 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /readyz + port: {{ .Values.provider.healthPort | default 9090 }} + initialDelaySeconds: 5 + periodSeconds: 10 volumeMounts: - mountPath: "/usr/local/tls" name: tls diff --git a/deployments/ratify-gatekeeper-provider/templates/service.yaml b/deployments/ratify-gatekeeper-provider/templates/service.yaml index b0709d18c..224e9058d 100644 --- a/deployments/ratify-gatekeeper-provider/templates/service.yaml +++ b/deployments/ratify-gatekeeper-provider/templates/service.yaml @@ -7,7 +7,8 @@ metadata: spec: type: ClusterIP ports: - - port: 6001 - targetPort: 6001 + - name: https + port: 6001 + targetPort: https selector: - {{- include "ratify.selectorLabels" . | nindent 4 }} \ No newline at end of file + {{- include "ratify.selectorLabels" . | nindent 4 }} diff --git a/deployments/ratify-gatekeeper-provider/values.yaml b/deployments/ratify-gatekeeper-provider/values.yaml index f9f704383..848e10e4d 100644 --- a/deployments/ratify-gatekeeper-provider/values.yaml +++ b/deployments/ratify-gatekeeper-provider/values.yaml @@ -59,6 +59,7 @@ provider: key: "" # key used by ratify (httpserver), please provide your own key caCert: "" # CA crt used by ratify (httpserver), please provide your own CA crt disableCertRotation: false + healthPort: 9090 disableMutation: false disableCRDManager: false timeout: diff --git a/internal/healthprobe/checker.go b/internal/healthprobe/checker.go new file mode 100644 index 000000000..6a364ef36 --- /dev/null +++ b/internal/healthprobe/checker.go @@ -0,0 +1,143 @@ +/* +Copyright The Ratify Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package healthprobe + +import ( + "errors" + "fmt" + "sync" +) + +// HealthChecker reports component health to the dedicated health probe server. +type HealthChecker interface { + Name() string + Check() error +} + +// CheckerFunc adapts a function into a named health checker. +type CheckerFunc struct { + name string + fn func() error +} + +// NewChecker creates a named health checker. +func NewChecker(name string, fn func() error) (*CheckerFunc, error) { + if name == "" { + return nil, errors.New("checker name is required") + } + if fn == nil { + return nil, errors.New("checker function is required") + } + return &CheckerFunc{name: name, fn: fn}, nil +} + +// MustNewChecker creates a checker and panics if the checker is invalid. +func MustNewChecker(name string, fn func() error) *CheckerFunc { + checker, err := NewChecker(name, fn) + if err != nil { + panic(err) + } + return checker +} + +// Name returns the checker name. +func (c *CheckerFunc) Name() string { + if c == nil { + return "" + } + return c.name +} + +// Check runs the checker function. +func (c *CheckerFunc) Check() error { + if c == nil { + return errors.New("checker is nil") + } + if c.fn == nil { + return errors.New("checker function is nil") + } + return c.fn() +} + +// Registry stores liveness and readiness checks for the health probe server. +type Registry struct { + mu sync.RWMutex + liveness []HealthChecker + readiness []HealthChecker +} + +// NewRegistry creates an empty checker registry. +func NewRegistry() *Registry { + return &Registry{} +} + +// RegisterLiveness adds a liveness checker. +func (r *Registry) RegisterLiveness(checker HealthChecker) error { + return r.register(&r.liveness, checker) +} + +// RegisterReadiness adds a readiness checker. +func (r *Registry) RegisterReadiness(checker HealthChecker) error { + return r.register(&r.readiness, checker) +} + +func (r *Registry) register(target *[]HealthChecker, checker HealthChecker) error { + if r == nil { + return errors.New("registry is nil") + } + if checker == nil { + return errors.New("checker is nil") + } + if checker.Name() == "" { + return errors.New("checker name is required") + } + + r.mu.Lock() + defer r.mu.Unlock() + + for _, existing := range *target { + if existing.Name() == checker.Name() { + return fmt.Errorf("checker %q is already registered", checker.Name()) + } + } + + *target = append(*target, checker) + return nil +} + +// LivenessCheckers returns a snapshot of the registered liveness checks. +func (r *Registry) LivenessCheckers() []HealthChecker { + if r == nil { + return nil + } + + r.mu.RLock() + defer r.mu.RUnlock() + + return append([]HealthChecker(nil), r.liveness...) +} + +// ReadinessCheckers returns a snapshot of the registered readiness checks. +func (r *Registry) ReadinessCheckers() []HealthChecker { + if r == nil { + return nil + } + + r.mu.RLock() + defer r.mu.RUnlock() + + return append([]HealthChecker(nil), r.readiness...) +} diff --git a/internal/healthprobe/checker_test.go b/internal/healthprobe/checker_test.go new file mode 100644 index 000000000..1dee1a919 --- /dev/null +++ b/internal/healthprobe/checker_test.go @@ -0,0 +1,236 @@ +/* +Copyright The Ratify Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package healthprobe + +import ( + "errors" + "sync" + "testing" +) + +func TestNewChecker(t *testing.T) { + tests := []struct { + name string + cName string + fn func() error + wantErr string + }{ + { + name: "valid checker", + cName: "test", + fn: func() error { return nil }, + }, + { + name: "empty name", + cName: "", + fn: func() error { return nil }, + wantErr: "checker name is required", + }, + { + name: "nil function", + cName: "test", + fn: nil, + wantErr: "checker function is required", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := NewChecker(tt.cName, tt.fn) + if tt.wantErr != "" { + if err == nil || err.Error() != tt.wantErr { + t.Fatalf("expected error %q, got %v", tt.wantErr, err) + } + if c != nil { + t.Fatal("expected nil checker on error") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if c.Name() != tt.cName { + t.Fatalf("expected name %q, got %q", tt.cName, c.Name()) + } + }) + } +} + +func TestMustNewChecker_Panics(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Fatal("expected panic") + } + }() + MustNewChecker("", nil) +} + +func TestMustNewChecker_Valid(t *testing.T) { + c := MustNewChecker("ok", func() error { return nil }) + if c.Name() != "ok" { + t.Fatalf("expected name %q, got %q", "ok", c.Name()) + } +} + +func TestCheckerFunc_NilReceiver(t *testing.T) { + var c *CheckerFunc + if c.Name() != "" { + t.Fatal("nil receiver Name() should return empty string") + } + if err := c.Check(); err == nil || err.Error() != "checker is nil" { + t.Fatalf("nil receiver Check() should return 'checker is nil', got %v", err) + } +} + +func TestCheckerFunc_NilFn(t *testing.T) { + c := &CheckerFunc{name: "broken"} + if err := c.Check(); err == nil || err.Error() != "checker function is nil" { + t.Fatalf("expected 'checker function is nil', got %v", err) + } +} + +func TestCheckerFunc_ReturnsError(t *testing.T) { + expected := errors.New("something failed") + c := MustNewChecker("failing", func() error { return expected }) + if err := c.Check(); !errors.Is(err, expected) { + t.Fatalf("expected %v, got %v", expected, err) + } +} + +func TestCheckerFunc_ReturnsNil(t *testing.T) { + c := MustNewChecker("healthy", func() error { return nil }) + if err := c.Check(); err != nil { + t.Fatalf("expected nil error, got %v", err) + } +} + +func TestRegistry_RegisterLiveness(t *testing.T) { + r := NewRegistry() + c := MustNewChecker("live1", func() error { return nil }) + + if err := r.RegisterLiveness(c); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + checkers := r.LivenessCheckers() + if len(checkers) != 1 || checkers[0].Name() != "live1" { + t.Fatal("expected one liveness checker named 'live1'") + } +} + +func TestRegistry_RegisterReadiness(t *testing.T) { + r := NewRegistry() + c := MustNewChecker("ready1", func() error { return nil }) + + if err := r.RegisterReadiness(c); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + checkers := r.ReadinessCheckers() + if len(checkers) != 1 || checkers[0].Name() != "ready1" { + t.Fatal("expected one readiness checker named 'ready1'") + } +} + +func TestRegistry_DuplicateDetection(t *testing.T) { + r := NewRegistry() + target := []HealthChecker{MustNewChecker("dup", func() error { return nil })} + + err := r.register(&target, MustNewChecker("dup", func() error { return nil })) + if err == nil { + t.Fatal("expected error for duplicate registration") + } + if err.Error() != `checker "dup" is already registered` { + t.Fatalf("unexpected error message: %v", err) + } +} + +func TestRegistry_Register_EmptyName(t *testing.T) { + r := NewRegistry() + err := r.register(&[]HealthChecker{}, &CheckerFunc{fn: func() error { return nil }}) + if err == nil || err.Error() != "checker name is required" { + t.Fatalf("expected empty name error, got %v", err) + } +} + +func TestRegistry_Register_NilRegistry(t *testing.T) { + checker := MustNewChecker("live", func() error { return nil }) + target := []HealthChecker{} + var r *Registry + if err := r.register(&target, checker); err == nil || err.Error() != "registry is nil" { + t.Fatalf("expected nil registry error, got %v", err) + } +} + +func TestRegistry_NilChecker(t *testing.T) { + r := NewRegistry() + err := r.RegisterLiveness(nil) + if err == nil || err.Error() != "checker is nil" { + t.Fatalf("expected 'checker is nil', got %v", err) + } +} + +func TestRegistry_SnapshotIsolation(t *testing.T) { + r := NewRegistry() + c1 := MustNewChecker("first", func() error { return nil }) + _ = r.RegisterLiveness(c1) + + snapshot := r.LivenessCheckers() + + c2 := MustNewChecker("second", func() error { return nil }) + _ = r.RegisterLiveness(c2) + + // Snapshot should not contain the checker added after it was taken. + if len(snapshot) != 1 { + t.Fatalf("snapshot should have 1 checker, got %d", len(snapshot)) + } + + // New snapshot should have both. + current := r.LivenessCheckers() + if len(current) != 2 { + t.Fatalf("current should have 2 checkers, got %d", len(current)) + } +} + +func TestRegistry_NilRegistry(t *testing.T) { + var r *Registry + if checkers := r.LivenessCheckers(); checkers != nil { + t.Fatal("nil registry LivenessCheckers() should return nil") + } + if checkers := r.ReadinessCheckers(); checkers != nil { + t.Fatal("nil registry ReadinessCheckers() should return nil") + } +} + +func TestRegistry_ConcurrentAccess(_ *testing.T) { + r := NewRegistry() + var wg sync.WaitGroup + + for i := 0; i < 50; i++ { + wg.Add(1) + go func(idx int) { + defer wg.Done() + c := MustNewChecker( + // Use unique names to avoid duplicate error + errors.New("checker").Error()+string(rune('A'+idx)), + func() error { return nil }, + ) + _ = r.RegisterLiveness(c) + _ = r.LivenessCheckers() + }(i) + } + wg.Wait() +} diff --git a/internal/healthprobe/server.go b/internal/healthprobe/server.go new file mode 100644 index 000000000..eebf3243c --- /dev/null +++ b/internal/healthprobe/server.go @@ -0,0 +1,217 @@ +/* +Copyright The Ratify Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package healthprobe + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "os" + "os/signal" + "sync/atomic" + "syscall" + "time" +) + +const ( + readHeaderTimeout = 5 * time.Second + shutdownTimeout = 5 * time.Second + + statusNotAlive = "not alive" + statusNotReady = "not ready" + statusError = "error" + msgCheckerNil = "checker is nil" +) + +// CheckResult captures the outcome of a single health check. +type CheckResult struct { + Name string `json:"name"` + Status string `json:"status"` + Error string `json:"error,omitempty"` +} + +type response struct { + Status string `json:"status"` + Checks []CheckResult `json:"checks,omitempty"` +} + +// Server serves plain HTTP /healthz and /readyz endpoints on a dedicated port. +type Server struct { + address string + registry *Registry + mux *http.ServeMux + started atomic.Bool +} + +// NewServer creates a dedicated health probe server. +func NewServer(address string, registry *Registry) (*Server, error) { + if address == "" { + return nil, fmt.Errorf("health probe address is required") + } + if registry == nil { + registry = NewRegistry() + } + + s := &Server{ + address: address, + registry: registry, + mux: http.NewServeMux(), + } + s.registerHandlers() + return s, nil +} + +func (s *Server) registerHandlers() { + if s == nil || s.mux == nil { + return + } + + s.mux.HandleFunc("/healthz", s.handleHealthz) + s.mux.HandleFunc("/readyz", s.handleReadyz) +} + +// Start runs the health probe server until SIGINT or SIGTERM is received. +func (s *Server) Start() error { + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + return s.Run(ctx) +} + +// Run runs the health probe server until the context is canceled. +func (s *Server) Run(ctx context.Context) error { + if s == nil { + return fmt.Errorf("health probe server is nil") + } + if ctx == nil { + return fmt.Errorf("health probe context is nil") + } + if s.mux == nil { + return fmt.Errorf("health probe mux is nil") + } + if s.registry == nil { + return fmt.Errorf("health probe registry is nil") + } + + srv := &http.Server{ + Addr: s.address, + Handler: s.mux, + ReadHeaderTimeout: readHeaderTimeout, + } + + s.started.Store(true) + + errCh := make(chan error, 1) + go func() { + if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { + errCh <- fmt.Errorf("failed to start health probe server: %w", err) + return + } + errCh <- nil + }() + + select { + case <-ctx.Done(): + shutdownCtx, cancel := context.WithTimeout(context.Background(), shutdownTimeout) + defer cancel() + if err := srv.Shutdown(shutdownCtx); err != nil { + return fmt.Errorf("failed to shutdown health probe server: %w", err) + } + return nil + case err := <-errCh: + return err + } +} + +func (s *Server) handleHealthz(w http.ResponseWriter, _ *http.Request) { + if s == nil || !s.started.Load() || s.registry == nil { + writeJSON(w, http.StatusServiceUnavailable, response{Status: statusNotAlive}) + return + } + + results, healthy := evaluate(s.registry.LivenessCheckers()) + if !healthy { + writeJSON(w, http.StatusServiceUnavailable, response{Status: statusNotAlive, Checks: results}) + return + } + + writeJSON(w, http.StatusOK, response{Status: "ok", Checks: results}) +} + +func (s *Server) handleReadyz(w http.ResponseWriter, _ *http.Request) { + if s == nil || !s.started.Load() || s.registry == nil { + writeJSON(w, http.StatusServiceUnavailable, response{Status: statusNotReady}) + return + } + + checkers := s.registry.ReadinessCheckers() + if len(checkers) == 0 { + writeJSON(w, http.StatusServiceUnavailable, response{Status: statusNotReady, Checks: []CheckResult{{ + Name: "registry", + Status: statusError, + Error: "no readiness checks registered", + }}}) + return + } + + results, healthy := evaluate(checkers) + if !healthy { + writeJSON(w, http.StatusServiceUnavailable, response{Status: statusNotReady, Checks: results}) + return + } + + writeJSON(w, http.StatusOK, response{Status: "ok", Checks: results}) +} + +func evaluate(checkers []HealthChecker) ([]CheckResult, bool) { + results := make([]CheckResult, 0, len(checkers)) + healthy := true + + for _, checker := range checkers { + if checker == nil { + healthy = false + results = append(results, CheckResult{ + Name: "unknown", + Status: statusError, + Error: msgCheckerNil, + }) + continue + } + + if err := checker.Check(); err != nil { + healthy = false + results = append(results, CheckResult{ + Name: checker.Name(), + Status: statusError, + Error: err.Error(), + }) + continue + } + + results = append(results, CheckResult{ + Name: checker.Name(), + Status: "ok", + }) + } + + return results, healthy +} + +func writeJSON(w http.ResponseWriter, statusCode int, payload response) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(statusCode) + _ = json.NewEncoder(w).Encode(payload) +} diff --git a/internal/healthprobe/server_test.go b/internal/healthprobe/server_test.go new file mode 100644 index 000000000..f6c5f4526 --- /dev/null +++ b/internal/healthprobe/server_test.go @@ -0,0 +1,541 @@ +/* +Copyright The Ratify Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package healthprobe + +import ( + "context" + "encoding/json" + "errors" + "net" + "net/http" + "net/http/httptest" + "os" + "strings" + "syscall" + "testing" + "time" +) + +func newTestServer(t *testing.T, registry *Registry) *Server { + t.Helper() + s, err := NewServer(":9090", registry) + if err != nil { + t.Fatalf("unexpected error creating server: %v", err) + } + s.started.Store(true) + return s +} + +func TestNewServer_EmptyAddress(t *testing.T) { + _, err := NewServer("", nil) + if err == nil || err.Error() != "health probe address is required" { + t.Fatalf("expected address error, got %v", err) + } +} + +func TestNewServer_NilRegistry(t *testing.T) { + s, err := NewServer(":9090", nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if s.registry == nil { + t.Fatal("expected non-nil registry when nil is passed") + } +} + +func TestHealthz_AllPass(t *testing.T) { + reg := NewRegistry() + _ = reg.RegisterLiveness(MustNewChecker("check1", func() error { return nil })) + _ = reg.RegisterLiveness(MustNewChecker("check2", func() error { return nil })) + + s := newTestServer(t, reg) + rec := httptest.NewRecorder() + s.handleHealthz(rec, httptest.NewRequest(http.MethodGet, "/healthz", nil)) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var resp response + if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if resp.Status != "ok" { + t.Fatalf("expected status 'ok', got %q", resp.Status) + } + if len(resp.Checks) != 2 { + t.Fatalf("expected 2 checks, got %d", len(resp.Checks)) + } + for _, c := range resp.Checks { + if c.Status != "ok" { + t.Fatalf("expected check status 'ok', got %q", c.Status) + } + } +} + +func TestHealthz_OneFails(t *testing.T) { + reg := NewRegistry() + _ = reg.RegisterLiveness(MustNewChecker("ok-check", func() error { return nil })) + _ = reg.RegisterLiveness(MustNewChecker("bad-check", func() error { return errors.New("disk full") })) + + s := newTestServer(t, reg) + rec := httptest.NewRecorder() + s.handleHealthz(rec, httptest.NewRequest(http.MethodGet, "/healthz", nil)) + + if rec.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503, got %d", rec.Code) + } + + var resp response + _ = json.NewDecoder(rec.Body).Decode(&resp) + if resp.Status != "not alive" { + t.Fatalf("expected status 'not alive', got %q", resp.Status) + } + + found := false + for _, c := range resp.Checks { + if c.Name == "bad-check" { + found = true + if c.Status != "error" { + t.Fatalf("expected check status 'error', got %q", c.Status) + } + if c.Error != "disk full" { + t.Fatalf("expected error 'disk full', got %q", c.Error) + } + } + } + if !found { + t.Fatal("expected 'bad-check' in response") + } +} + +func TestHealthz_NoCheckers(t *testing.T) { + reg := NewRegistry() + s := newTestServer(t, reg) + rec := httptest.NewRecorder() + s.handleHealthz(rec, httptest.NewRequest(http.MethodGet, "/healthz", nil)) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200 with no liveness checks, got %d", rec.Code) + } +} + +func TestReadyz_AllPass(t *testing.T) { + reg := NewRegistry() + _ = reg.RegisterReadiness(MustNewChecker("ready1", func() error { return nil })) + + s := newTestServer(t, reg) + rec := httptest.NewRecorder() + s.handleReadyz(rec, httptest.NewRequest(http.MethodGet, "/readyz", nil)) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var resp response + _ = json.NewDecoder(rec.Body).Decode(&resp) + if resp.Status != "ok" { + t.Fatalf("expected status 'ok', got %q", resp.Status) + } +} + +func TestReadyz_NoCheckers(t *testing.T) { + reg := NewRegistry() + s := newTestServer(t, reg) + rec := httptest.NewRecorder() + s.handleReadyz(rec, httptest.NewRequest(http.MethodGet, "/readyz", nil)) + + if rec.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503 when no readiness checks registered, got %d", rec.Code) + } +} + +func TestReadyz_OneFails(t *testing.T) { + reg := NewRegistry() + _ = reg.RegisterReadiness(MustNewChecker("db", func() error { return errors.New("connection refused") })) + + s := newTestServer(t, reg) + rec := httptest.NewRecorder() + s.handleReadyz(rec, httptest.NewRequest(http.MethodGet, "/readyz", nil)) + + if rec.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503, got %d", rec.Code) + } + + var resp response + _ = json.NewDecoder(rec.Body).Decode(&resp) + if resp.Status != "not ready" { + t.Fatalf("expected status 'not ready', got %q", resp.Status) + } + if len(resp.Checks) == 0 { + t.Fatal("expected checks in response") + } + if resp.Checks[0].Error != "connection refused" { + t.Fatalf("expected error 'connection refused', got %q", resp.Checks[0].Error) + } +} + +func TestServer_ResponseContentType(t *testing.T) { + reg := NewRegistry() + s := newTestServer(t, reg) + rec := httptest.NewRecorder() + s.handleHealthz(rec, httptest.NewRequest(http.MethodGet, "/healthz", nil)) + + ct := rec.Header().Get("Content-Type") + if ct != "application/json" { + t.Fatalf("expected Content-Type 'application/json', got %q", ct) + } +} + +func TestServer_Run_CancelContext(t *testing.T) { + reg := NewRegistry() + s, err := NewServer("127.0.0.1:0", reg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + ctx, cancel := context.WithCancel(context.Background()) + errCh := make(chan error, 1) + go func() { + errCh <- s.Run(ctx) + }() + + time.Sleep(50 * time.Millisecond) + cancel() + + select { + case err := <-errCh: + if err != nil { + t.Fatalf("unexpected error from Run: %v", err) + } + case <-time.After(5 * time.Second): + t.Fatal("server did not shut down in time") + } +} + +func TestServer_Start_ShutsDownOnInterrupt(t *testing.T) { + reg := NewRegistry() + s, err := NewServer("127.0.0.1:0", reg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + errCh := make(chan error, 1) + go func() { + errCh <- s.Start() + }() + + deadline := time.Now().Add(5 * time.Second) + for !s.started.Load() { + if time.Now().After(deadline) { + t.Fatal("server did not start in time") + } + time.Sleep(10 * time.Millisecond) + } + + if err := syscall.Kill(os.Getpid(), syscall.SIGINT); err != nil { + t.Fatalf("failed to send interrupt: %v", err) + } + + select { + case err := <-errCh: + if err != nil { + t.Fatalf("expected Start to return nil, got %v", err) + } + case <-time.After(5 * time.Second): + t.Fatal("Start did not return in time") + } +} + +func TestServer_Run_ListenAndServeError(t *testing.T) { + listener, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to reserve port: %v", err) + } + defer listener.Close() + + s, err := NewServer(listener.Addr().String(), NewRegistry()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + err = s.Run(context.Background()) + if err == nil { + t.Fatal("expected ListenAndServe error") + } + if got, want := err.Error(), "failed to start health probe server"; len(got) < len(want) || got[:len(want)] != want { + t.Fatalf("expected error prefix %q, got %q", want, got) + } +} + +func TestServer_Run_ShutdownError(t *testing.T) { + listener, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to reserve port: %v", err) + } + address := listener.Addr().String() + if err := listener.Close(); err != nil { + t.Fatalf("failed to release port: %v", err) + } + + requestStarted := make(chan struct{}) + releaseHandler := make(chan struct{}) + requestErrCh := make(chan error, 1) + + s := &Server{address: address, registry: NewRegistry(), mux: http.NewServeMux()} + s.mux.HandleFunc("/block", func(w http.ResponseWriter, _ *http.Request) { + select { + case <-requestStarted: + default: + close(requestStarted) + } + <-releaseHandler + w.WriteHeader(http.StatusOK) + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + errCh := make(chan error, 1) + go func() { + errCh <- s.Run(ctx) + }() + + deadline := time.Now().Add(5 * time.Second) + for { + conn, err := net.DialTimeout("tcp", address, 50*time.Millisecond) + if err == nil { + _ = conn.Close() + break + } + if time.Now().After(deadline) { + t.Fatal("server did not start listening in time") + } + time.Sleep(10 * time.Millisecond) + } + + go func() { + resp, err := http.Get("http://" + address + "/block") + if resp != nil { + _ = resp.Body.Close() + } + requestErrCh <- err + }() + + select { + case <-requestStarted: + case <-time.After(5 * time.Second): + t.Fatal("blocking request did not reach handler") + } + + cancel() + + select { + case err := <-errCh: + if err == nil || !strings.Contains(err.Error(), "failed to shutdown health probe server") { + t.Fatalf("expected shutdown error, got %v", err) + } + case <-time.After(7 * time.Second): + t.Fatal("Run did not return shutdown error in time") + } + + close(releaseHandler) + + select { + case err := <-requestErrCh: + if err != nil { + t.Fatalf("blocking request failed: %v", err) + } + case <-time.After(2 * time.Second): + t.Fatal("blocking request did not complete in time") + } +} + +func TestServer_Run_InvalidState(t *testing.T) { + tests := []struct { + name string + server *Server + ctx context.Context + wantErr string + }{ + { + name: "nil server", + server: nil, + ctx: context.Background(), + wantErr: "health probe server is nil", + }, + { + name: "nil context", + server: &Server{address: ":0", mux: http.NewServeMux(), registry: NewRegistry()}, + ctx: nil, + wantErr: "health probe context is nil", + }, + { + name: "nil mux", + server: &Server{address: ":0", registry: NewRegistry()}, + ctx: context.Background(), + wantErr: "health probe mux is nil", + }, + { + name: "nil registry", + server: &Server{address: ":0", mux: http.NewServeMux()}, + ctx: context.Background(), + wantErr: "health probe registry is nil", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.server.Run(tt.ctx) + if err == nil || err.Error() != tt.wantErr { + t.Fatalf("expected error %q, got %v", tt.wantErr, err) + } + }) + } +} + +func TestEvaluate_NilChecker(t *testing.T) { + results, healthy := evaluate([]HealthChecker{nil}) + if healthy { + t.Fatal("expected evaluate to report unhealthy for nil checker") + } + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + if results[0].Name != "unknown" { + t.Fatalf("expected checker name %q, got %q", "unknown", results[0].Name) + } + if results[0].Status != statusError { + t.Fatalf("expected checker status %q, got %q", statusError, results[0].Status) + } + if results[0].Error != msgCheckerNil { + t.Fatalf("expected checker error %q, got %q", msgCheckerNil, results[0].Error) + } +} + +func TestServer_NotStarted(t *testing.T) { + reg := NewRegistry() + s, _ := NewServer(":9090", reg) + // Don't set started to true + rec := httptest.NewRecorder() + s.handleHealthz(rec, httptest.NewRequest(http.MethodGet, "/healthz", nil)) + + if rec.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503 when server not started, got %d", rec.Code) + } +} + +func TestReadyz_NotStarted(t *testing.T) { + tests := []struct { + name string + server *Server + }{ + { + name: "nil server", + server: nil, + }, + { + name: "server not started", + server: &Server{registry: NewRegistry()}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rec := httptest.NewRecorder() + tt.server.handleReadyz(rec, httptest.NewRequest(http.MethodGet, "/readyz", nil)) + + if rec.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503 when ready server is unavailable, got %d", rec.Code) + } + + var resp response + if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if resp.Status != statusNotReady { + t.Fatalf("expected status %q, got %q", statusNotReady, resp.Status) + } + }) + } +} + +func TestServer_RegisterHandlers(t *testing.T) { + s, err := NewServer(":9090", NewRegistry()) + if err != nil { + t.Fatalf("unexpected error creating server: %v", err) + } + + tests := []struct { + path string + wantStatus string + }{ + {path: "/healthz", wantStatus: statusNotAlive}, + {path: "/readyz", wantStatus: statusNotReady}, + } + + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + rec := httptest.NewRecorder() + s.mux.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, tt.path, nil)) + + if rec.Code != http.StatusServiceUnavailable { + t.Fatalf("expected %s to be registered and return 503, got %d", tt.path, rec.Code) + } + + var resp response + if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if resp.Status != tt.wantStatus { + t.Fatalf("expected status %q, got %q", tt.wantStatus, resp.Status) + } + }) + } +} + +func TestServer_RegisterHandlers_NilGuards(t *testing.T) { + tests := []struct { + name string + run func() + }{ + { + name: "nil server", + run: func() { + var s *Server + s.registerHandlers() + }, + }, + { + name: "nil mux", + run: func() { + s := &Server{registry: NewRegistry()} + s.registerHandlers() + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Fatalf("registerHandlers panicked: %v", r) + } + }() + tt.run() + }) + } +} diff --git a/internal/httpserver/health.go b/internal/httpserver/health.go new file mode 100644 index 000000000..45ac1bd67 --- /dev/null +++ b/internal/httpserver/health.go @@ -0,0 +1,143 @@ +/* +Copyright The Ratify Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package httpserver + +import ( + "encoding/json" + "fmt" + "net/http" + "sync/atomic" + + "github.com/notaryproject/ratify/v2/internal/executor" + "github.com/notaryproject/ratify/v2/internal/healthprobe" +) + +const ( + httpServerAliveCheckerName = "http-server-alive" + httpServerExecutorCheckerName = "http-server-executor" +) + +// HealthStatus tracks whether the main Ratify HTTP server is serving traffic. +type HealthStatus struct { + alive atomic.Bool + ready atomic.Bool +} + +// healthStatus preserves compatibility with existing package-local tests. +type healthStatus = HealthStatus + +type healthResponse struct { + Status string `json:"status"` +} + +// NewHealthStatus creates a server health tracker. +func NewHealthStatus() *HealthStatus { + return &HealthStatus{} +} + +// MarkAlive records that the main Ratify HTTP server is ready to accept traffic. +func (h *HealthStatus) MarkAlive() { + if h == nil { + return + } + h.alive.Store(true) +} + +// MarkReady records that the main Ratify HTTP server has a usable executor. +func (h *HealthStatus) MarkReady() { + if h == nil { + return + } + h.ready.Store(true) +} + +// IsAlive reports whether the main Ratify HTTP server is serving traffic. +func (h *HealthStatus) IsAlive() bool { + if h == nil { + return false + } + return h.alive.Load() +} + +// IsReady reports whether the main Ratify HTTP server has a usable executor. +func (h *HealthStatus) IsReady() bool { + if h == nil { + return false + } + return h.ready.Load() +} + +// AliveChecker returns a liveness check for the main Ratify HTTP server. +func (h *HealthStatus) AliveChecker() healthprobe.HealthChecker { + return healthprobe.MustNewChecker(httpServerAliveCheckerName, func() error { + if h == nil { + return fmt.Errorf("http server health status is nil") + } + if !h.IsAlive() { + return fmt.Errorf("http server is not serving") + } + return nil + }) +} + +// ExecutorChecker returns a readiness check for the executor backing the HTTP server. +func (h *HealthStatus) ExecutorChecker(getExecutor func() *executor.ScopedExecutor) healthprobe.HealthChecker { + return healthprobe.MustNewChecker(httpServerExecutorCheckerName, func() error { + if h == nil { + return fmt.Errorf("http server health status is nil") + } + if !h.IsAlive() { + return fmt.Errorf("http server is not serving") + } + if getExecutor == nil { + return fmt.Errorf("executor getter is nil") + } + if getExecutor() == nil { + return fmt.Errorf("executor is not loaded") + } + return nil + }) +} + +func (s *server) healthzHandler() http.HandlerFunc { + return func(w http.ResponseWriter, _ *http.Request) { + if s == nil || s.health == nil || !s.health.IsAlive() { + writeHealthResponse(w, http.StatusServiceUnavailable, healthResponse{Status: "not alive"}) + return + } + writeHealthResponse(w, http.StatusOK, healthResponse{Status: "ok"}) + } +} + +func (s *server) readyzHandler() http.HandlerFunc { + return func(w http.ResponseWriter, _ *http.Request) { + if s == nil || s.health == nil || !s.health.IsAlive() || !s.health.IsReady() { + writeHealthResponse(w, http.StatusServiceUnavailable, healthResponse{Status: "not ready"}) + return + } + if s.getExecutor == nil || s.getExecutor() == nil { + writeHealthResponse(w, http.StatusServiceUnavailable, healthResponse{Status: "no executor configured"}) + return + } + writeHealthResponse(w, http.StatusOK, healthResponse{Status: "ok"}) + } +} + +func writeHealthResponse(w http.ResponseWriter, statusCode int, payload healthResponse) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(statusCode) + _ = json.NewEncoder(w).Encode(payload) +} diff --git a/internal/httpserver/health_test.go b/internal/httpserver/health_test.go new file mode 100644 index 000000000..bd78141d3 --- /dev/null +++ b/internal/httpserver/health_test.go @@ -0,0 +1,267 @@ +/* +Copyright The Ratify Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package httpserver + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/notaryproject/ratify/v2/internal/executor" +) + +func TestHealthzHandler_Alive(t *testing.T) { + s := &server{ + health: &healthStatus{}, + } + s.health.alive.Store(true) + + req := httptest.NewRequest(http.MethodGet, "/healthz", nil) + w := httptest.NewRecorder() + s.healthzHandler().ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Errorf("expected status 200, got %d", w.Code) + } + var resp healthResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if resp.Status != "ok" { + t.Errorf("expected status 'ok', got %q", resp.Status) + } +} + +func TestHealthzHandler_NotAlive(t *testing.T) { + s := &server{ + health: &healthStatus{}, + } + // alive defaults to false + + req := httptest.NewRequest(http.MethodGet, "/healthz", nil) + w := httptest.NewRecorder() + s.healthzHandler().ServeHTTP(w, req) + + if w.Code != http.StatusServiceUnavailable { + t.Errorf("expected status 503, got %d", w.Code) + } + var resp healthResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if resp.Status != "not alive" { + t.Errorf("expected status 'not alive', got %q", resp.Status) + } +} + +func TestReadyzHandler_NotReady(t *testing.T) { + s := &server{ + health: &healthStatus{}, + } + s.health.alive.Store(true) + // ready defaults to false + + req := httptest.NewRequest(http.MethodGet, "/readyz", nil) + w := httptest.NewRecorder() + s.readyzHandler().ServeHTTP(w, req) + + if w.Code != http.StatusServiceUnavailable { + t.Errorf("expected status 503, got %d", w.Code) + } +} + +func TestReadyzHandler_NoExecutor(t *testing.T) { + s := &server{ + health: &healthStatus{}, + getExecutor: func() *executor.ScopedExecutor { return nil }, + } + s.health.alive.Store(true) + s.health.ready.Store(true) + + req := httptest.NewRequest(http.MethodGet, "/readyz", nil) + w := httptest.NewRecorder() + s.readyzHandler().ServeHTTP(w, req) + + if w.Code != http.StatusServiceUnavailable { + t.Errorf("expected status 503, got %d", w.Code) + } + var resp healthResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if resp.Status != "no executor configured" { + t.Errorf("expected 'no executor configured', got %q", resp.Status) + } +} + +func TestReadyzHandler_Ready(t *testing.T) { + s := &server{ + health: &healthStatus{}, + getExecutor: func() *executor.ScopedExecutor { return &executor.ScopedExecutor{} }, + } + s.health.alive.Store(true) + s.health.ready.Store(true) + + req := httptest.NewRequest(http.MethodGet, "/readyz", nil) + w := httptest.NewRecorder() + s.readyzHandler().ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Errorf("expected status 200, got %d", w.Code) + } + var resp healthResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if resp.Status != "ok" { + t.Errorf("expected status 'ok', got %q", resp.Status) + } +} + +func TestReadyzHandler_NilGetExecutor(t *testing.T) { + s := &server{ + health: &healthStatus{}, + getExecutor: nil, + } + s.health.alive.Store(true) + s.health.ready.Store(true) + + req := httptest.NewRequest(http.MethodGet, "/readyz", nil) + w := httptest.NewRecorder() + s.readyzHandler().ServeHTTP(w, req) + + if w.Code != http.StatusServiceUnavailable { + t.Errorf("expected status 503, got %d", w.Code) + } +} + +func TestMarkAlive_NilReceiver(t *testing.T) { + var h *HealthStatus + h.MarkAlive() // should not panic +} + +func TestMarkAlive(t *testing.T) { + h := NewHealthStatus() + if h.IsAlive() { + t.Error("expected not alive initially") + } + h.MarkAlive() + if !h.IsAlive() { + t.Error("expected alive after MarkAlive") + } +} + +func TestMarkReady_NilReceiver(t *testing.T) { + var h *HealthStatus + h.MarkReady() // should not panic +} + +func TestMarkReady(t *testing.T) { + h := NewHealthStatus() + if h.IsReady() { + t.Error("expected not ready initially") + } + h.MarkReady() + if !h.IsReady() { + t.Error("expected ready after MarkReady") + } +} + +func TestIsAlive_NilReceiver(t *testing.T) { + var h *HealthStatus + if h.IsAlive() { + t.Error("expected false for nil receiver") + } +} + +func TestIsReady_NilReceiver(t *testing.T) { + var h *HealthStatus + if h.IsReady() { + t.Error("expected false for nil receiver") + } +} + +func TestAliveChecker_NilReceiver(t *testing.T) { + var h *HealthStatus + checker := h.AliveChecker() + if err := checker.Check(); err == nil { + t.Error("expected error for nil health status") + } +} + +func TestAliveChecker_NotAlive(t *testing.T) { + h := NewHealthStatus() + checker := h.AliveChecker() + if checker.Name() != httpServerAliveCheckerName { + t.Errorf("unexpected checker name: %s", checker.Name()) + } + if err := checker.Check(); err == nil { + t.Error("expected error when not alive") + } +} + +func TestAliveChecker_Alive(t *testing.T) { + h := NewHealthStatus() + h.MarkAlive() + checker := h.AliveChecker() + if err := checker.Check(); err != nil { + t.Errorf("unexpected error: %v", err) + } +} + +func TestExecutorChecker_NilReceiver(t *testing.T) { + var h *HealthStatus + checker := h.ExecutorChecker(func() *executor.ScopedExecutor { return &executor.ScopedExecutor{} }) + if err := checker.Check(); err == nil { + t.Error("expected error for nil health status") + } +} + +func TestExecutorChecker_NotAlive(t *testing.T) { + h := NewHealthStatus() + checker := h.ExecutorChecker(func() *executor.ScopedExecutor { return &executor.ScopedExecutor{} }) + if err := checker.Check(); err == nil { + t.Error("expected error when not alive") + } +} + +func TestExecutorChecker_NilGetter(t *testing.T) { + h := NewHealthStatus() + h.MarkAlive() + checker := h.ExecutorChecker(nil) + if err := checker.Check(); err == nil { + t.Error("expected error when getter is nil") + } +} + +func TestExecutorChecker_NilExecutor(t *testing.T) { + h := NewHealthStatus() + h.MarkAlive() + checker := h.ExecutorChecker(func() *executor.ScopedExecutor { return nil }) + if err := checker.Check(); err == nil { + t.Error("expected error when executor is nil") + } +} + +func TestExecutorChecker_Success(t *testing.T) { + h := NewHealthStatus() + h.MarkAlive() + checker := h.ExecutorChecker(func() *executor.ScopedExecutor { return &executor.ScopedExecutor{} }) + if err := checker.Check(); err != nil { + t.Errorf("unexpected error: %v", err) + } +} diff --git a/internal/httpserver/server.go b/internal/httpserver/server.go index 31cbca173..59fd7db02 100644 --- a/internal/httpserver/server.go +++ b/internal/httpserver/server.go @@ -31,6 +31,7 @@ import ( "github.com/notaryproject/ratify/v2/internal/cache/ristretto" "github.com/notaryproject/ratify/v2/internal/controller" "github.com/notaryproject/ratify/v2/internal/executor" + "github.com/notaryproject/ratify/v2/internal/healthprobe" "github.com/notaryproject/ratify/v2/internal/httpserver/config" "github.com/notaryproject/ratify/v2/internal/httpserver/tlssecret" "github.com/sirupsen/logrus" @@ -55,6 +56,7 @@ type server struct { mutateCache cache.Cache[string] verifyCache cache.Cache[*result] sfGroup *singleflight.Group + health *HealthStatus ServerOptions } @@ -106,11 +108,20 @@ type ServerOptions struct { // certificates. // Optional. CertRotatorReady chan struct{} + + // HealthRegistry holds liveness and readiness checks for the dedicated + // health probe server. + // Optional. + HealthRegistry *healthprobe.Registry } // StartServer initializes and starts the Ratify server with provided options // and configuration file path. func StartServer(opts *ServerOptions, executorConfigPath string) error { + if opts == nil { + return fmt.Errorf("server options are required") + } + server, configWatcher, err := newServer(opts, executorConfigPath) if err != nil { logrus.Errorf("Failed to create server: %v", err) @@ -122,6 +133,10 @@ func StartServer(opts *ServerOptions, executorConfigPath string) error { } func newServer(serverOpts *ServerOptions, executorConfigPath string) (*server, *config.Watcher, error) { + if serverOpts == nil { + return nil, nil, fmt.Errorf("server options are required") + } + var configWatcher *config.Watcher var getExecutorFunc func() *executor.ScopedExecutor var err error @@ -151,6 +166,7 @@ func newServer(serverOpts *ServerOptions, executorConfigPath string) (*server, * verifyCache: verifyCache, sfGroup: new(singleflight.Group), getExecutor: getExecutorFunc, + health: NewHealthStatus(), ServerOptions: *serverOpts, } if server.VerifyTimeout == 0 { @@ -160,12 +176,28 @@ func newServer(serverOpts *ServerOptions, executorConfigPath string) (*server, * server.MutateTimeout = defaultMutateTimeout } + if err := server.registerHealthChecks(); err != nil { + return nil, nil, fmt.Errorf("failed to register health checks: %w", err) + } if err := server.registerHandlers(); err != nil { return nil, nil, fmt.Errorf("failed to register handlers: %w", err) } return server, configWatcher, nil } +func (s *server) registerHealthChecks() error { + if s == nil || s.HealthRegistry == nil || s.health == nil { + return nil + } + if err := s.HealthRegistry.RegisterLiveness(s.health.AliveChecker()); err != nil { + return err + } + if err := s.HealthRegistry.RegisterReadiness(s.health.ExecutorChecker(s.getExecutor)); err != nil { + return err + } + return nil +} + func (s *server) registerHandlers() error { if err := s.registerVerifyHandler(); err != nil { return err @@ -229,8 +261,17 @@ func (s *server) Run(certRotatorReady chan struct{}, configWatcher *config.Watch IdleTimeout: idleTimeout, } go func() { - // Start the configuration watcher (if any) and ensure - // it is properly stopped when the server goroutine exits. + for { + if s.getExecutor() != nil { + s.health.MarkReady() + logrus.Info("server is ready: executor loaded") + return + } + time.Sleep(100 * time.Millisecond) + } + }() + + go func() { if configWatcher != nil { if err := configWatcher.Start(); err != nil { logrus.WithError(err).Error("failed to start config watcher") @@ -257,6 +298,8 @@ func (s *server) Run(certRotatorReady chan struct{}, configWatcher *config.Watch } defer certWatcher.Stop() + s.health.MarkAlive() + // Use GetConfigForClient to dynamically load certificates. srv.TLSConfig = &tls.Config{ MinVersion: tls.VersionTLS13, @@ -267,13 +310,13 @@ func (s *server) Run(certRotatorReady chan struct{}, configWatcher *config.Watch } } else { logrus.Infof("starting server without TLS at %s", s.HTTPServerAddress) + s.health.MarkAlive() if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { logrus.Errorf("failed to start server: %v", err) } } }() - // Handle graceful shutdown. quit := make(chan os.Signal, 1) signal.Notify(quit, os.Interrupt, syscall.SIGTERM) <-quit diff --git a/internal/manager/manager.go b/internal/manager/manager.go index cdbfa0fc5..96e05bceb 100644 --- a/internal/manager/manager.go +++ b/internal/manager/manager.go @@ -16,6 +16,7 @@ limitations under the License. package manager import ( + "context" "crypto/x509" "fmt" "os" @@ -48,12 +49,31 @@ func init() { utilruntime.Must(v2alpha1.AddToScheme(scheme)) } +type managerReadyRunnable struct { + mgr ctrl.Manager + ready *ReadySignal +} + +func (r managerReadyRunnable) Start(ctx context.Context) error { + if r.ready == nil { + <-ctx.Done() + return nil + } + if !r.mgr.GetCache().WaitForCacheSync(ctx) { + return fmt.Errorf("manager cache failed to sync") + } + r.ready.MarkReady() + <-ctx.Done() + return nil +} + // StartManager creates a new Manager which is responsible for creating // Controllers. -func StartManager(certRotatorReady chan struct{}, disableMutation bool, disableCRDManager bool) { +func StartManager(certRotatorReady chan struct{}, managerReady *ReadySignal, disableMutation bool, disableCRDManager bool) { ctrl.SetLogger(logrusr.New(logrus.StandardLogger())) mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ - Scheme: scheme, + Scheme: scheme, + HealthProbeBindAddress: "0", }) if err != nil { setupLog.Error(err, "could not create ratify manager") @@ -63,6 +83,13 @@ func StartManager(certRotatorReady chan struct{}, disableMutation bool, disableC setupCertRotator(certRotatorReady, mgr, disableMutation) setupCRDControllers(mgr, disableCRDManager) + if managerReady != nil { + if err := mgr.Add(managerReadyRunnable{mgr: mgr, ready: managerReady}); err != nil { + setupLog.Error(err, "could not register manager readiness runnable") + os.Exit(1) + } + } + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "could not start manager") os.Exit(1) diff --git a/internal/manager/ready.go b/internal/manager/ready.go new file mode 100644 index 000000000..5700542a5 --- /dev/null +++ b/internal/manager/ready.go @@ -0,0 +1,84 @@ +/* +Copyright The Ratify Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package manager + +import ( + "fmt" + "sync" + + "github.com/notaryproject/ratify/v2/internal/healthprobe" +) + +const managerCheckerName = "controller-runtime-manager" + +// ReadySignal tracks when the controller-runtime manager has started. +type ReadySignal struct { + once sync.Once + ready chan struct{} +} + +// NewReadySignal creates a manager readiness signal. +func NewReadySignal() *ReadySignal { + return &ReadySignal{ + ready: make(chan struct{}), + } +} + +// Done returns a channel that closes once the manager is ready. +func (r *ReadySignal) Done() <-chan struct{} { + if r == nil { + return nil + } + return r.ready +} + +// MarkReady closes the readiness signal once. +func (r *ReadySignal) MarkReady() { + if r == nil || r.ready == nil { + return + } + + r.once.Do(func() { + close(r.ready) + }) +} + +// IsReady reports whether the manager readiness signal has fired. +func (r *ReadySignal) IsReady() bool { + if r == nil || r.ready == nil { + return false + } + + select { + case <-r.ready: + return true + default: + return false + } +} + +// Checker returns a readiness check for the controller-runtime manager. +func (r *ReadySignal) Checker() healthprobe.HealthChecker { + return healthprobe.MustNewChecker(managerCheckerName, func() error { + if r == nil { + return fmt.Errorf("manager readiness signal is nil") + } + if !r.IsReady() { + return fmt.Errorf("controller-runtime manager is not ready") + } + return nil + }) +} diff --git a/internal/manager/ready_test.go b/internal/manager/ready_test.go new file mode 100644 index 000000000..c69ad7677 --- /dev/null +++ b/internal/manager/ready_test.go @@ -0,0 +1,106 @@ +/* +Copyright The Ratify Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package manager + +import ( + "testing" + "time" +) + +func TestNewReadySignal(t *testing.T) { + rs := NewReadySignal() + if rs == nil { + t.Fatal("expected non-nil ReadySignal") + } + if rs.IsReady() { + t.Fatal("new ReadySignal should not be ready") + } +} + +func TestReadySignal_MarkReady(t *testing.T) { + rs := NewReadySignal() + rs.MarkReady() + if !rs.IsReady() { + t.Fatal("expected IsReady() to be true after MarkReady()") + } +} + +func TestReadySignal_MarkReadyIdempotent(t *testing.T) { + rs := NewReadySignal() + rs.MarkReady() + rs.MarkReady() // should not panic + if !rs.IsReady() { + t.Fatal("expected IsReady() to be true") + } +} + +func TestReadySignal_DoneChannel(t *testing.T) { + rs := NewReadySignal() + + select { + case <-rs.Done(): + t.Fatal("Done channel should not be closed before MarkReady") + default: + } + + rs.MarkReady() + + select { + case <-rs.Done(): + // expected + case <-time.After(time.Second): + t.Fatal("Done channel should be closed after MarkReady") + } +} + +func TestReadySignal_NilReceiver(t *testing.T) { + var rs *ReadySignal + if rs.IsReady() { + t.Fatal("nil receiver IsReady() should return false") + } + if rs.Done() != nil { + t.Fatal("nil receiver Done() should return nil") + } + // Should not panic + rs.MarkReady() +} + +func TestReadySignal_Checker_NotReady(t *testing.T) { + rs := NewReadySignal() + checker := rs.Checker() + + if checker.Name() != managerCheckerName { + t.Fatalf("expected checker name %q, got %q", managerCheckerName, checker.Name()) + } + + err := checker.Check() + if err == nil { + t.Fatal("expected error when not ready") + } + if err.Error() != "controller-runtime manager is not ready" { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestReadySignal_Checker_Ready(t *testing.T) { + rs := NewReadySignal() + rs.MarkReady() + checker := rs.Checker() + + if err := checker.Check(); err != nil { + t.Fatalf("expected no error when ready, got %v", err) + } +}