diff --git a/Makefile b/Makefile index ae8f84f..316b6eb 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,7 @@ generate-configuration: # Format: example_path::observed_resources_path (observed_resources_path is optional) EXAMPLES := \ examples/knativestacks/minimal.yaml:: \ + examples/knativestacks/nodepool.yaml:: \ examples/knativestacks/standard.yaml:: # Render all examples (parallel execution, output shown per-job when complete) diff --git a/apis/knativestacks/definition.yaml b/apis/knativestacks/definition.yaml index aa9e758..778131c 100644 --- a/apis/knativestacks/definition.yaml +++ b/apis/knativestacks/definition.yaml @@ -102,6 +102,34 @@ spec: enum: - ProviderConfig - ClusterProviderConfig + nodePool: + description: Optional dedicated Karpenter NodePool for Knative and NATS workloads. When enabled, non-DaemonSet pods are scheduled with workload-type=knative and tolerate knative=true:NoSchedule. + type: object + properties: + enabled: + description: Whether to create a dedicated NodePool. Defaults to false. + type: boolean + default: false + name: + description: NodePool name on the target cluster. Defaults to "hops-knative". + type: string + nodeClassName: + description: EKS Auto Mode NodeClass to reference. Defaults to "hops-default". + type: string + limits: + description: Karpenter NodePool resource limits. Defaults to nodes=10. + type: object + x-kubernetes-preserve-unknown-fields: true + requirements: + description: Karpenter scheduling requirements. Defaults to amd64/linux spot or on-demand nodes. + type: array + items: + type: object + x-kubernetes-preserve-unknown-fields: true + disruption: + description: Karpenter disruption settings. Defaults to WhenEmptyOrUnderutilized after 60s. + type: object + x-kubernetes-preserve-unknown-fields: true knativeOperator: description: Configuration for the Knative Operator component. type: object diff --git a/examples/knativestacks/nodepool.yaml b/examples/knativestacks/nodepool.yaml new file mode 100644 index 0000000..545ab48 --- /dev/null +++ b/examples/knativestacks/nodepool.yaml @@ -0,0 +1,11 @@ +apiVersion: hops.ops.com.ai/v1alpha1 +kind: KnativeStack +metadata: + name: knative + namespace: default +spec: + clusterName: default + labels: + team: platform + nodePool: + enabled: true diff --git a/functions/render/000-state-init.yaml.gotmpl b/functions/render/000-state-init.yaml.gotmpl index d0afeaa..6c0dc04 100644 --- a/functions/render/000-state-init.yaml.gotmpl +++ b/functions/render/000-state-init.yaml.gotmpl @@ -58,6 +58,38 @@ "kind" ($k8sProviderConfigRef.kind | default "ProviderConfig") }} +# ============================================================================== +# NodePool configuration +# ============================================================================== +{{- $nodePoolSpec := $spec.nodePool | default dict }} +{{- $nodePoolEnabled := false }} +{{- if hasKey $nodePoolSpec "enabled" }} + {{- $nodePoolEnabled = $nodePoolSpec.enabled }} +{{- end }} +{{- $nodePoolName := $nodePoolSpec.name | default "hops-knative" }} +{{- $nodePoolNodeClassName := $nodePoolSpec.nodeClassName | default "hops-default" }} +{{- $nodePoolLimits := $nodePoolSpec.limits | default (dict "nodes" 10) }} +{{- $nodePoolRequirements := $nodePoolSpec.requirements | default (list + (dict "key" "karpenter.sh/capacity-type" "operator" "In" "values" (list "spot" "on-demand")) + (dict "key" "eks.amazonaws.com/instance-category" "operator" "In" "values" (list "c" "m" "r")) + (dict "key" "eks.amazonaws.com/instance-generation" "operator" "Gt" "values" (list "4")) + (dict "key" "eks.amazonaws.com/instance-memory" "operator" "Gt" "values" (list "7999")) + (dict "key" "eks.amazonaws.com/instance-cpu" "operator" "Gt" "values" (list "1")) + (dict "key" "kubernetes.io/arch" "operator" "In" "values" (list "amd64")) + (dict "key" "kubernetes.io/os" "operator" "In" "values" (list "linux")) +) }} +{{- $nodePoolDisruption := $nodePoolSpec.disruption | default (dict "consolidationPolicy" "WhenEmptyOrUnderutilized" "consolidateAfter" "60s") }} +{{- $nodePoolTaintKey := "knative" }} +{{- $nodePoolTaintValue := "true" }} +{{- $nodePoolNodeSelector := dict }} +{{- $nodePoolTolerations := list }} +{{- $nodePoolAffinity := dict }} +{{- if $nodePoolEnabled }} + {{- $nodePoolNodeSelector = dict "workload-type" "knative" }} + {{- $nodePoolTolerations = list (dict "key" $nodePoolTaintKey "operator" "Equal" "value" $nodePoolTaintValue "effect" "NoSchedule") }} + {{- $nodePoolAffinity = dict "nodeAffinity" (dict "requiredDuringSchedulingIgnoredDuringExecution" (dict "nodeSelectorTerms" (list (dict "matchExpressions" (list (dict "key" "workload-type" "operator" "In" "values" (list "knative"))))))) }} +{{- end }} + # ============================================================================== # Per-component defaults # ============================================================================== @@ -149,6 +181,35 @@ (dict "URL" "https://github.com/knative-extensions/net-gateway-api/releases/download/knative-v1.21.0/release.yaml") ) }} +{{- $servingDeployments := list }} +{{- range $deploymentDefaults := list + (dict "name" "activator" "container" "activator" "cpu" "15m" "memory" "100Mi") + (dict "name" "autoscaler" "container" "autoscaler" "cpu" "15m" "memory" "100Mi") + (dict "name" "autoscaler-hpa" "container" "autoscaler-hpa" "cpu" "15m" "memory" "100Mi") + (dict "name" "controller" "container" "controller" "cpu" "15m" "memory" "100Mi") + (dict "name" "webhook" "container" "webhook" "cpu" "15m" "memory" "100Mi") + (dict "name" "net-gateway-api-controller" "container" "controller" "cpu" "15m" "memory" "100Mi") + (dict "name" "net-gateway-api-webhook" "container" "webhook" "cpu" "15m" "memory" "100Mi") + (dict "name" "net-istio-controller" "container" "controller" "cpu" "15m" "memory" "100Mi") + (dict "name" "net-istio-webhook" "container" "webhook" "cpu" "15m" "memory" "100Mi") +}} + {{- $deployment := dict + "name" $deploymentDefaults.name + "resources" (list + (dict + "container" $deploymentDefaults.container + "requests" (dict "cpu" $deploymentDefaults.cpu "memory" $deploymentDefaults.memory) + "limits" (dict "memory" "256Mi") + ) + ) + }} + {{- if $nodePoolEnabled }} + {{- $_ := set $deployment "nodeSelector" $nodePoolNodeSelector }} + {{- $_ := set $deployment "tolerations" $nodePoolTolerations }} + {{- end }} + {{- $servingDeployments = append $servingDeployments $deployment }} +{{- end }} +{{- $_ := set $natsKnServingDefaults "deployments" $servingDeployments }} {{- $servingSpec := merge ($knServing.spec | default dict) $natsKnServingDefaults }} # ============================================================================== @@ -193,7 +254,7 @@ "container" (dict "merge" (dict "resources" (dict - "requests" (dict "cpu" "100m" "memory" "256Mi") + "requests" (dict "cpu" "11m" "memory" "50Mi") "limits" (dict "cpu" "500m" "memory" "512Mi") ) ) @@ -201,12 +262,32 @@ "reloader" (dict "merge" (dict "resources" (dict - "requests" (dict "cpu" "10m" "memory" "32Mi") + "requests" (dict "cpu" "11m" "memory" "50Mi") "limits" (dict "cpu" "50m" "memory" "64Mi") ) ) ) + "natsBox" (dict + "container" (dict + "merge" (dict + "resources" (dict + "requests" (dict "cpu" "15m" "memory" "100Mi") + "limits" (dict "cpu" "100m" "memory" "256Mi") + ) + ) + ) + ) }} +{{- if $nodePoolEnabled }} + {{- $_ := set $natsDefaultValues "podTemplate" (dict + "merge" (dict + "spec" (dict + "nodeSelector" $nodePoolNodeSelector + "tolerations" $nodePoolTolerations + ) + ) + ) }} +{{- end }} {{- $natsValues := mergeOverwrite (deepCopy $natsDefaultValues) ($nats.values | default dict) }} # ============================================================================== @@ -216,20 +297,37 @@ # mTLS at the node level — eventing doesn't need to know about the mesh. {{- /* eventing-webhook ships with a 200Mi limit that OOMKills under modest ApiServerSource / Trigger reconcile load. Bump to 512Mi so it stays up. */}} -{{- $eventingDefaults := dict - "config" dict - "deployments" (list - (dict - "name" "eventing-webhook" - "resources" (list - (dict - "container" "eventing-webhook" - "requests" (dict "memory" "100Mi") - "limits" (dict "memory" "512Mi") - ) +{{- $eventingDeployments := list }} +{{- range $deployment := list + (dict "name" "eventing-controller" "container" "eventing-controller" "cpu" "15m" "memory" "284Mi" "limitMemory" "512Mi") + (dict "name" "eventing-webhook" "container" "eventing-webhook" "cpu" "15m" "memory" "309Mi" "limitMemory" "512Mi") + (dict "name" "imc-controller" "container" "controller" "cpu" "23m" "memory" "100Mi" "limitMemory" "256Mi") + (dict "name" "imc-dispatcher" "container" "dispatcher" "cpu" "15m" "memory" "100Mi" "limitMemory" "256Mi") + (dict "name" "job-sink" "container" "job-sink" "cpu" "15m" "memory" "100Mi" "limitMemory" "256Mi") + (dict "name" "mt-broker-controller" "container" "mt-broker-controller" "cpu" "15m" "memory" "100Mi" "limitMemory" "256Mi") + (dict "name" "mt-broker-filter" "container" "filter" "cpu" "15m" "memory" "100Mi" "limitMemory" "256Mi") + (dict "name" "mt-broker-ingress" "container" "ingress" "cpu" "15m" "memory" "100Mi" "limitMemory" "256Mi") + (dict "name" "request-reply" "container" "request-reply" "cpu" "15m" "memory" "100Mi" "limitMemory" "256Mi") +}} + {{- $eventingDeployment := dict + "name" $deployment.name + "resources" (list + (dict + "container" $deployment.container + "requests" (dict "cpu" $deployment.cpu "memory" $deployment.memory) + "limits" (dict "memory" $deployment.limitMemory) ) ) - ) + }} + {{- if $nodePoolEnabled }} + {{- $_ := set $eventingDeployment "nodeSelector" $nodePoolNodeSelector }} + {{- $_ := set $eventingDeployment "tolerations" $nodePoolTolerations }} + {{- end }} + {{- $eventingDeployments = append $eventingDeployments $eventingDeployment }} +{{- end }} +{{- $eventingDefaults := dict + "config" dict + "deployments" $eventingDeployments }} {{- if $natsEnabled }} {{- $natsNs := $nats.namespace | default "nats" }} @@ -257,6 +355,19 @@ "autoTls" (dict "enabled" $autoTlsEnabled "namespaceSelector" $autoTlsSelector) "helmProviderConfigRef" $helmProviderConfigRef "kubernetesProviderConfigRef" $k8sProviderConfigRef + "nodePool" (dict + "enabled" $nodePoolEnabled + "name" $nodePoolName + "nodeClassName" $nodePoolNodeClassName + "limits" $nodePoolLimits + "requirements" $nodePoolRequirements + "disruption" $nodePoolDisruption + "taintKey" $nodePoolTaintKey + "taintValue" $nodePoolTaintValue + "nodeSelector" $nodePoolNodeSelector + "tolerations" $nodePoolTolerations + "affinity" $nodePoolAffinity + ) "knativeOperator" (dict "name" ($knOp.name | default "knative-operator") "namespace" ($knOp.namespace | default "knative-operator") diff --git a/functions/render/155-nodepool.yaml.gotmpl b/functions/render/155-nodepool.yaml.gotmpl new file mode 100644 index 0000000..408ec94 --- /dev/null +++ b/functions/render/155-nodepool.yaml.gotmpl @@ -0,0 +1,91 @@ +# code: language=yaml +# +# Optional Karpenter NodePool for Knative and NATS workloads. +# + +{{- if $state.nodePool.enabled }} +--- +apiVersion: kubernetes.m.crossplane.io/v1alpha1 +kind: Object +metadata: + name: {{ $state.name }}-nodepool-knative + annotations: + {{ setResourceNameAnnotation "nodepool-knative" }} + labels: {{ $state.labels | toJson }} +spec: + managementPolicies: {{ $state.managementPolicies | toJson }} + forProvider: + manifest: + apiVersion: karpenter.sh/v1 + kind: NodePool + metadata: + name: {{ $state.nodePool.name }} + spec: + template: + metadata: + labels: + workload-type: knative + spec: + nodeClassRef: + group: eks.amazonaws.com + kind: NodeClass + name: {{ $state.nodePool.nodeClassName }} + taints: + - key: {{ $state.nodePool.taintKey }} + value: {{ $state.nodePool.taintValue | quote }} + effect: NoSchedule + requirements: {{ $state.nodePool.requirements | toJson }} + limits: {{ $state.nodePool.limits | toJson }} + disruption: {{ $state.nodePool.disruption | toJson }} + providerConfigRef: + name: {{ $state.kubernetesProviderConfigRef.name }} + kind: {{ $state.kubernetesProviderConfigRef.kind }} + +--- +apiVersion: protection.crossplane.io/v1beta1 +kind: Usage +metadata: + name: {{ $state.name }}-delete-operator-before-nodepool + annotations: + {{ setResourceNameAnnotation "usage-operator-before-nodepool" }} + labels: {{ $state.labels | toJson }} +spec: + replayDeletion: true + of: + apiVersion: kubernetes.m.crossplane.io/v1alpha1 + kind: Object + resourceRef: + name: {{ $state.name }}-nodepool-knative + by: + apiVersion: helm.m.crossplane.io/v1beta1 + kind: Release + resourceRef: + name: {{ $state.name }}-{{ $state.knativeOperator.name }} + +{{- $natsNodePoolUsageEnabled := $state.nats.enabled }} +{{- if and $state.nats.enabled $state.nats.storageClass.enabled (not $state.observed.natsStorageClass.ready) }} + {{- $natsNodePoolUsageEnabled = false }} +{{- end }} +{{- if $natsNodePoolUsageEnabled }} +--- +apiVersion: protection.crossplane.io/v1beta1 +kind: Usage +metadata: + name: {{ $state.name }}-delete-nats-before-nodepool + annotations: + {{ setResourceNameAnnotation "usage-nats-before-nodepool" }} + labels: {{ $state.labels | toJson }} +spec: + replayDeletion: true + of: + apiVersion: kubernetes.m.crossplane.io/v1alpha1 + kind: Object + resourceRef: + name: {{ $state.name }}-nodepool-knative + by: + apiVersion: helm.m.crossplane.io/v1beta1 + kind: Release + resourceRef: + name: {{ $state.name }}-{{ $state.nats.name }} +{{- end }} +{{- end }} diff --git a/functions/render/210-knative-operator.yaml.gotmpl b/functions/render/210-knative-operator.yaml.gotmpl index b10f9d8..17a7965 100644 --- a/functions/render/210-knative-operator.yaml.gotmpl +++ b/functions/render/210-knative-operator.yaml.gotmpl @@ -4,6 +4,42 @@ # {{- $knOp := $state.knativeOperator }} +{{- $operatorComponent := dict + "resources" (dict + "requests" (dict "cpu" "15m" "memory" "100Mi") + "limits" (dict "cpu" "1000m" "memory" "512Mi") + ) +}} +{{- $webhookComponent := dict + "resources" (dict + "requests" (dict "cpu" "15m" "memory" "284Mi") + "limits" (dict "cpu" "500m" "memory" "512Mi") + ) +}} +{{- if $state.nodePool.enabled }} + {{- $_ := set $operatorComponent "affinity" $state.nodePool.affinity }} + {{- $_ := set $operatorComponent "tolerations" $state.nodePool.tolerations }} + {{- $webhookAffinity := deepCopy $state.nodePool.affinity }} + {{- $_ := set $webhookAffinity "podAntiAffinity" (dict + "preferredDuringSchedulingIgnoredDuringExecution" (list + (dict + "weight" 100 + "podAffinityTerm" (dict + "labelSelector" (dict "matchLabels" (dict "app" "operator-webhook")) + "topologyKey" "kubernetes.io/hostname" + ) + ) + ) + ) }} + {{- $_ := set $webhookComponent "affinity" $webhookAffinity }} + {{- $_ := set $webhookComponent "tolerations" $state.nodePool.tolerations }} +{{- end }} +{{- $operatorDefaults := dict + "knative_operator" (dict + "knative_operator" $operatorComponent + "operator_webhook" $webhookComponent + ) +}} --- apiVersion: helm.m.crossplane.io/v1beta1 @@ -26,9 +62,10 @@ spec: {{- if $knOp.overrideAllValues }} values: {{- toYaml $knOp.overrideAllValues | nindent 6 }} - {{- else if $knOp.values }} + {{- else }} + {{- $mergedValues := mergeOverwrite $operatorDefaults ($knOp.values | default dict) }} values: - {{- toYaml $knOp.values | nindent 6 }} + {{- toYaml $mergedValues | nindent 6 }} {{- end }} rollbackLimit: 3 providerConfigRef: diff --git a/tests/test-knative/main.k b/tests/test-knative/main.k index 24642b2..ca11b6b 100644 --- a/tests/test-knative/main.k +++ b/tests/test-knative/main.k @@ -56,6 +56,20 @@ _items = [ name = "my-cluster" kind = "ProviderConfig" } + spec.forProvider.values = { + container.merge.resources.requests = { + cpu = "11m" + memory = "50Mi" + } + reloader.merge.resources.requests = { + cpu = "11m" + memory = "50Mi" + } + natsBox.container.merge.resources.requests = { + cpu = "15m" + memory = "100Mi" + } + } } # Serving namespace renders by default { @@ -356,6 +370,44 @@ _items = [ apiVersion = "kubernetes.m.crossplane.io/v1alpha1" kind = "Object" metadata.name = "test-observed-knative-serving" + spec.forProvider.manifest.spec.deployments = [ + { + name = "activator" + resources = [{container = "activator", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "autoscaler" + resources = [{container = "autoscaler", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "autoscaler-hpa" + resources = [{container = "autoscaler-hpa", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "controller" + resources = [{container = "controller", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "webhook" + resources = [{container = "webhook", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "net-gateway-api-controller" + resources = [{container = "controller", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "net-gateway-api-webhook" + resources = [{container = "webhook", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "net-istio-controller" + resources = [{container = "controller", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "net-istio-webhook" + resources = [{container = "webhook", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + ] } # PeerAuthentication: dropped (ambient-mode rebuild — mTLS lives in ztunnel) # Eventing Object renders when operator + namespace ready @@ -363,6 +415,44 @@ _items = [ apiVersion = "kubernetes.m.crossplane.io/v1alpha1" kind = "Object" metadata.name = "test-observed-knative-eventing" + spec.forProvider.manifest.spec.deployments = [ + { + name = "eventing-controller" + resources = [{container = "eventing-controller", requests = {cpu = "15m", memory = "284Mi"}, limits = {memory = "512Mi"}}] + } + { + name = "eventing-webhook" + resources = [{container = "eventing-webhook", requests = {cpu = "15m", memory = "309Mi"}, limits = {memory = "512Mi"}}] + } + { + name = "imc-controller" + resources = [{container = "controller", requests = {cpu = "23m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "imc-dispatcher" + resources = [{container = "dispatcher", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "job-sink" + resources = [{container = "job-sink", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "mt-broker-controller" + resources = [{container = "mt-broker-controller", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "mt-broker-filter" + resources = [{container = "filter", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "mt-broker-ingress" + resources = [{container = "ingress", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + { + name = "request-reply" + resources = [{container = "request-reply", requests = {cpu = "15m", memory = "100Mi"}, limits = {memory = "256Mi"}}] + } + ] } ] } @@ -558,5 +648,95 @@ _items = [ ] } } + + # ========================================================================= + # Test 11: nodePool adds operator/NATS placement and renders NodePool. + # ========================================================================= + metav1alpha1.CompositionTest { + metadata.name = "nodepool-adds-operator-nats-placement" + spec = { + compositionPath = _compositionPath + xrdPath = _xrdPath + timeoutSeconds = 60 + validate = False + xr = hopsv1alpha1.KnativeStack { + metadata.name = "test-nodepool" + spec = { + clusterName = "my-cluster" + nodePool.enabled = True + } + } + observedResources = [ + { + apiVersion = "kubernetes.m.crossplane.io/v1alpha1" + kind = "Object" + metadata = { + name = "test-nodepool-knative-nats-sc" + annotations = {"crossplane.io/composition-resource-name" = "nats-storage-class"} + } + status.conditions = [ + {type = "Ready", status = "True"} + {type = "Synced", status = "True"} + ] + } + ] + assertResources = [ + { + apiVersion = "helm.m.crossplane.io/v1beta1" + kind = "Release" + metadata.name = "test-nodepool-knative-operator" + spec.forProvider.values.knative_operator.knative_operator = { + affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms = [ + {matchExpressions = [{key = "workload-type", operator = "In", values = ["knative"]}]} + ] + tolerations = [ + { + key = "knative" + operator = "Equal" + value = "true" + effect = "NoSchedule" + } + ] + resources.requests = { + cpu = "15m" + memory = "100Mi" + } + } + } + { + apiVersion = "helm.m.crossplane.io/v1beta1" + kind = "Release" + metadata.name = "test-nodepool-nats" + spec.forProvider.values.podTemplate.merge.spec = { + nodeSelector = {"workload-type" = "knative"} + tolerations = [ + { + key = "knative" + operator = "Equal" + value = "true" + effect = "NoSchedule" + } + ] + } + } + { + apiVersion = "kubernetes.m.crossplane.io/v1alpha1" + kind = "Object" + metadata.name = "test-nodepool-nodepool-knative" + spec.forProvider.manifest = { + apiVersion = "karpenter.sh/v1" + kind = "NodePool" + metadata.name = "hops-knative" + spec.template.metadata.labels = {"workload-type" = "knative"} + } + } + { + apiVersion = "protection.crossplane.io/v1beta1" + kind = "Usage" + metadata.name = "test-nodepool-delete-nats-before-nodepool" + } + ] + } + } ] items = _items