From 58f83d20fc2b6fdaeeeb0bccb118e4b1e18252d9 Mon Sep 17 00:00:00 2001 From: sallycr Date: Mon, 4 May 2026 22:19:59 -0700 Subject: [PATCH] feat: add helm/michelangelo Helm chart for control plane deployment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Intent: - Convert the Michelangelo control plane from raw kubectl apply calls in sandbox.py to a first-class Helm chart installable against any Kubernetes cluster (closes #1136) - Enable helm install for local k3d development and standard --set overrides for production and staging environments Changes: - Add helm/michelangelo/ chart with Chart.yaml, values.yaml, values-k3d.yaml, and 20 templates covering all 5 control plane services (apiserver, envoy, ui, worker, controllermgr) promoted to Deployments - Add schema init containers on apiserver (wait-for-metadata-storage + schema-init) eliminating the ordering race condition in sandbox.py - Replace boot.yaml cluster-admin with a least-privilege ClusterRole scoped to what controllermgr and apiserver crdSync actually need - Rename minio-credentials to object-storage-credentials in chart templates - Add per-service enabled toggles, Cadence/Temporal engine guards with fail-fast validation, and helm test hook Test Plan: - helm lint → 0 errors, 0 failures - helm template with no values → fails fast with clear required-value error - helm template -f values-k3d.yaml → 21 resources render clean - helm template with full production values (Temporal) → 21 resources render clean - helm install -f values-k3d.yaml against live k3d → all 5 pods Running - helm test michelangelo → Phase: Succeeded - helm upgrade --reuse-values → zero pod restarts - helm uninstall → credential Secrets survive (resource-policy: keep) Revert Plan: - Revert this PR via git revert. The helm/ directory is additive and sandbox.py is unchanged — no production behavior is affected. Closes #1136 --- helm/michelangelo/Chart.yaml | 60 ++ helm/michelangelo/README.md | 178 ++++++ helm/michelangelo/crds/.gitkeep | 0 helm/michelangelo/crds/README.md | 12 + .../files/schema/mysql-init-schema.sql | 578 ++++++++++++++++++ helm/michelangelo/templates/NOTES.txt | 90 +++ helm/michelangelo/templates/_helpers.tpl | 107 ++++ .../templates/core/apiserver-configmap.yaml | 22 + .../templates/core/apiserver-deployment.yaml | 145 +++++ .../core/apiserver-schema-init-configmap.yaml | 12 + .../templates/core/apiserver-service.yaml | 20 + .../core/controllermgr-configmap.yaml | 71 +++ .../core/controllermgr-deployment.yaml | 75 +++ .../templates/core/controllermgr-service.yaml | 20 + .../templates/core/envoy-configmap.yaml | 67 ++ .../templates/core/envoy-deployment.yaml | 64 ++ .../templates/core/envoy-service.yaml | 20 + .../core/metadata-storage-secret.yaml | 14 + .../templates/core/object-storage-secret.yaml | 15 + .../templates/core/ui-configmap.yaml | 14 + .../templates/core/ui-deployment.yaml | 64 ++ .../templates/core/ui-service.yaml | 20 + .../templates/core/worker-configmap.yaml | 44 ++ .../templates/core/worker-deployment.yaml | 62 ++ .../templates/rbac/clusterrole.yaml | 171 ++++++ .../templates/rbac/clusterrolebinding.yaml | 35 ++ .../templates/rbac/serviceaccount.yaml | 13 + .../templates/tests/test-connection.yaml | 25 + helm/michelangelo/values-k3d.yaml | 69 +++ helm/michelangelo/values.yaml | 316 ++++++++++ 30 files changed, 2403 insertions(+) create mode 100644 helm/michelangelo/Chart.yaml create mode 100644 helm/michelangelo/README.md create mode 100644 helm/michelangelo/crds/.gitkeep create mode 100644 helm/michelangelo/crds/README.md create mode 100644 helm/michelangelo/files/schema/mysql-init-schema.sql create mode 100644 helm/michelangelo/templates/NOTES.txt create mode 100644 helm/michelangelo/templates/_helpers.tpl create mode 100644 helm/michelangelo/templates/core/apiserver-configmap.yaml create mode 100644 helm/michelangelo/templates/core/apiserver-deployment.yaml create mode 100644 helm/michelangelo/templates/core/apiserver-schema-init-configmap.yaml create mode 100644 helm/michelangelo/templates/core/apiserver-service.yaml create mode 100644 helm/michelangelo/templates/core/controllermgr-configmap.yaml create mode 100644 helm/michelangelo/templates/core/controllermgr-deployment.yaml create mode 100644 helm/michelangelo/templates/core/controllermgr-service.yaml create mode 100644 helm/michelangelo/templates/core/envoy-configmap.yaml create mode 100644 helm/michelangelo/templates/core/envoy-deployment.yaml create mode 100644 helm/michelangelo/templates/core/envoy-service.yaml create mode 100644 helm/michelangelo/templates/core/metadata-storage-secret.yaml create mode 100644 helm/michelangelo/templates/core/object-storage-secret.yaml create mode 100644 helm/michelangelo/templates/core/ui-configmap.yaml create mode 100644 helm/michelangelo/templates/core/ui-deployment.yaml create mode 100644 helm/michelangelo/templates/core/ui-service.yaml create mode 100644 helm/michelangelo/templates/core/worker-configmap.yaml create mode 100644 helm/michelangelo/templates/core/worker-deployment.yaml create mode 100644 helm/michelangelo/templates/rbac/clusterrole.yaml create mode 100644 helm/michelangelo/templates/rbac/clusterrolebinding.yaml create mode 100644 helm/michelangelo/templates/rbac/serviceaccount.yaml create mode 100644 helm/michelangelo/templates/tests/test-connection.yaml create mode 100644 helm/michelangelo/values-k3d.yaml create mode 100644 helm/michelangelo/values.yaml diff --git a/helm/michelangelo/Chart.yaml b/helm/michelangelo/Chart.yaml new file mode 100644 index 000000000..cc032abb9 --- /dev/null +++ b/helm/michelangelo/Chart.yaml @@ -0,0 +1,60 @@ +apiVersion: v2 +name: michelangelo +description: | + Michelangelo is an open-source ML platform for orchestrating data, training, + and serving pipelines on Kubernetes. This chart installs the control plane — + apiserver, gRPC-Web proxy, UI, workflow worker, controller manager, CRDs, + and RBAC — into any Kubernetes cluster. Bring your own metadata storage, + object storage, and workflow engine (Cadence or Temporal). +type: application +version: 0.1.0 +appVersion: "0.2.1" +kubeVersion: ">=1.27.0-0" + +home: https://github.com/michelangelo-ai/michelangelo +icon: https://raw.githubusercontent.com/michelangelo-ai/michelangelo/main/docs/assets/logo.png + +sources: + - https://github.com/michelangelo-ai/michelangelo + - https://github.com/michelangelo-ai/michelangelo/tree/main/helm/michelangelo + +keywords: + - machine-learning + - mlops + - pipelines + - workflow + - kubernetes-operator + - ray + - spark + +maintainers: + - name: Michelangelo Maintainers + email: maintainers@michelangelo-ai.org + url: https://github.com/michelangelo-ai/michelangelo + +annotations: + artifacthub.io/category: machine-learning + artifacthub.io/license: Apache-2.0 + artifacthub.io/prerelease: "true" + artifacthub.io/changes: | + - kind: added + description: Initial release of the michelangelo Helm chart + - kind: added + description: Per-service enabled toggles (apiserver, envoy, ui, worker, controllermgr) + - kind: added + description: Schema-init container ensures metadata storage is ready before apiserver starts + - kind: added + description: Cadence and Temporal workflow engines selectable via workflow.engine + artifacthub.io/links: | + - name: Documentation + url: https://github.com/michelangelo-ai/michelangelo/tree/main/docs + - name: Source + url: https://github.com/michelangelo-ai/michelangelo + - name: Issues + url: https://github.com/michelangelo-ai/michelangelo/issues + artifacthub.io/maintainers: | + - name: Michelangelo Maintainers + email: maintainers@michelangelo-ai.org + artifacthub.io/signKey: | + fingerprint: 0000000000000000000000000000000000000000 + url: https://github.com/michelangelo-ai/michelangelo/releases diff --git a/helm/michelangelo/README.md b/helm/michelangelo/README.md new file mode 100644 index 000000000..e0580f123 --- /dev/null +++ b/helm/michelangelo/README.md @@ -0,0 +1,178 @@ +# Michelangelo Helm Chart + +[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/michelangelo)](https://artifacthub.io/packages/helm/michelangelo/michelangelo) + +The `michelangelo` Helm chart installs the Michelangelo control plane — apiserver, gRPC-Web proxy (envoy), UI, workflow worker, controller manager, CRDs, and RBAC — into any Kubernetes cluster. + +The chart owns only the **control plane**. Infrastructure (metadata storage, object storage, workflow engine) is your responsibility — bring your own RDS / Cloud SQL, S3 / GCS, and Cadence / Temporal, or use the local development setup below. + +## Prerequisites + +| Requirement | Version | Notes | +|---|---|---| +| Kubernetes | 1.27+ | Tested on 1.27 – 1.30. CRDs use `apiextensions.k8s.io/v1`. | +| Helm | 3.12+ | Required for the `lookup` and `required` template functions used by the chart. | +| Metadata storage | MySQL 8.0 or PostgreSQL 14+ | The chart provisions schema via an init container; you provide a reachable host and root credentials. | +| Object storage | S3-compatible | S3, GCS (HMAC), MinIO, or any S3-API endpoint. The chart consumes `endpoint`, access key, and secret key. | +| Workflow engine | Cadence or Temporal | Reachable at a host:port from inside the cluster. | +| Optional: cluster operators | KubeRay, Spark Operator | Required only if your pipelines use Ray or Spark tasks. Install separately from their upstream charts. | + +## Quick Install + +### Local development (k3d) + +The Michelangelo CLI provisions a local k3d cluster, MySQL, MinIO, and Cadence, then installs this chart with `values-k3d.yaml`: + +```bash +pip install michelangelo +michelangelo sandbox up +``` + +To run `helm install` directly against an existing k3d cluster with infrastructure already up: + +```bash +helm install michelangelo ./helm/michelangelo -f ./helm/michelangelo/values-k3d.yaml +``` + +### Production + +```bash +helm install michelangelo ./helm/michelangelo \ + --namespace michelangelo --create-namespace \ + --set metadataStorage.host=my-rds.example.com \ + --set metadataStorage.rootPassword=$METADATA_ROOT_PASSWORD \ + --set objectStorage.endpoint=s3.amazonaws.com \ + --set objectStorage.accessKeyId=$AWS_ACCESS_KEY_ID \ + --set objectStorage.secretAccessKey=$AWS_SECRET_ACCESS_KEY \ + --set workflow.endpoint=temporal-frontend.temporal:7233 \ + --set workflow.engine=temporal +``` + +For repeatable installs, write a `values-prod.yaml` and pass it with `-f` instead of long `--set` chains. Never put credentials in a file you commit to git — use `--set` from a secrets manager, or pre-create the `object-storage-credentials` Secret in the release namespace and let Helm leave it alone (the chart marks it `helm.sh/resource-policy: keep`). + +## Values Reference + +Top-level keys. See [`values.yaml`](./values.yaml) for the full annotated schema. + +| Key | Type | Default | Required | Description | +|---|---|---|---|---| +| `metadataStorage.driver` | string | `mysql` | yes | `mysql` or `postgres`. Selects the schema-init image and JDBC dialect. | +| `metadataStorage.host` | string | `""` | **yes** | Hostname of metadata storage (e.g. `my-rds.example.com`). Install fails if empty. | +| `metadataStorage.port` | int | `3306` | no | Defaults to 3306 for MySQL, 5432 for Postgres. | +| `metadataStorage.database` | string | `michelangelo` | no | Database name; created by the schema-init container if it does not exist. | +| `metadataStorage.rootPassword` | string | `""` | **yes** | Root password used by schema-init and runtime services. | +| `objectStorage.endpoint` | string | `""` | **yes** | S3-compatible endpoint (e.g. `s3.amazonaws.com`, `minio:9000`). | +| `objectStorage.secure` | bool | `true` | no | TLS for object storage. Set `false` for in-cluster MinIO. | +| `objectStorage.region` | string | `us-east-1` | no | AWS region; used by S3, GCS HMAC, and MinIO. | +| `objectStorage.bucket` | string | `michelangelo` | no | Bucket name for artifacts, models, and logs. | +| `objectStorage.accessKeyId` | string | `""` | conditional | Required unless `objectStorage.existingSecret` is set. | +| `objectStorage.secretAccessKey` | string | `""` | conditional | Required unless `objectStorage.existingSecret` is set. | +| `objectStorage.existingSecret` | string | `""` | no | Name of a pre-existing Secret with `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` keys. Takes precedence over the inline keys. | +| `workflow.engine` | string | `cadence` | yes | `cadence` or `temporal`. Mutually exclusive — selects which worker config block renders. | +| `workflow.endpoint` | string | `""` | **yes** | Workflow engine address (e.g. `cadence-frontend:7833`, `temporal-frontend:7233`). | +| `workflow.domain` | string | `default` | no | Cadence domain or Temporal namespace. | +| `images.apiserver` | string | `ghcr.io/michelangelo-ai/apiserver:main` | no | Override to pin a version or use a private registry. | +| `images.worker` | string | `ghcr.io/michelangelo-ai/worker:main` | no | | +| `images.ui` | string | `ghcr.io/michelangelo-ai/ui:main` | no | | +| `images.controllermgr` | string | `ghcr.io/michelangelo-ai/controllermgr:main` | no | | +| `images.envoy` | string | `envoyproxy/envoy:v1.29-latest` | no | | +| `images.pullPolicy` | string | `IfNotPresent` | no | | +| `imagePullSecrets` | list | `[]` | no | List of Secret names for private registries. | +| `apiserver.enabled` | bool | `true` | no | Toggle the apiserver Deployment. | +| `apiserver.port` | int | `15566` | no | gRPC port. | +| `apiserver.service.type` | string | `ClusterIP` | no | `ClusterIP`, `NodePort`, or `LoadBalancer`. | +| `apiserver.service.nodePort` | int | `null` | no | Required when `service.type=NodePort`. | +| `envoy.enabled` | bool | `true` | no | Toggle the gRPC-Web proxy. | +| `envoy.port` | int | `8081` | no | | +| `envoy.corsOrigins` | string | `""` | no | Regex for `Access-Control-Allow-Origin`. Required if the UI runs on a different host. | +| `envoy.service.type` | string | `ClusterIP` | no | | +| `envoy.service.nodePort` | int | `null` | no | | +| `ui.enabled` | bool | `true` | no | Toggle the UI Deployment. | +| `ui.apiBaseUrl` | string | `""` | conditional | Browser-reachable URL of the envoy proxy. Required when `ui.enabled=true`. | +| `ui.service.type` | string | `ClusterIP` | no | | +| `ui.service.port` | int | `80` | no | | +| `ui.service.nodePort` | int | `null` | no | | +| `worker.enabled` | bool | `true` | no | | +| `worker.replicas` | int | `1` | no | Scale horizontally for higher pipeline-run throughput. | +| `controllermgr.enabled` | bool | `true` | no | | +| `controllermgr.watchNamespace` | list | `[]` | no | Namespaces the controller watches. Empty = all namespaces (ClusterRole). Set to a list to scope down to namespaced Roles. | +| `serviceAccount.create` | bool | `true` | no | Create a ServiceAccount for the chart. | +| `serviceAccount.name` | string | `""` | no | Override the generated ServiceAccount name. | +| `podSecurityContext` | object | see values.yaml | no | Applied to every Pod. | +| `securityContext` | object | see values.yaml | no | Applied to every container. | +| `resources` | object | see values.yaml | no | Default resource requests/limits applied per service. | +| `nodeSelector` | object | `{}` | no | Applied to every Pod. | +| `tolerations` | list | `[]` | no | | +| `affinity` | object | `{}` | no | | + +## Upgrade + +```bash +helm upgrade michelangelo ./helm/michelangelo --reuse-values +``` + +`--reuse-values` keeps your previous `--set` and `-f` overrides. Pass new flags only for the values you want to change. To pin to a specific image tag during upgrade: + +```bash +helm upgrade michelangelo ./helm/michelangelo --reuse-values \ + --set images.apiserver=ghcr.io/michelangelo-ai/apiserver:v0.3.1 \ + --set images.worker=ghcr.io/michelangelo-ai/worker:v0.3.1 \ + --set images.ui=ghcr.io/michelangelo-ai/ui:v0.3.1 \ + --set images.controllermgr=ghcr.io/michelangelo-ai/controllermgr:v0.3.1 +``` + +The `object-storage-credentials` Secret is annotated `helm.sh/resource-policy: keep` and will not be overwritten on upgrade. To rotate credentials, update the Secret in place with `kubectl edit secret object-storage-credentials` or delete and re-apply it. + +## Uninstall + +```bash +helm uninstall michelangelo --namespace michelangelo +``` + +This removes all chart-managed resources. **It does not remove**: + +- The `object-storage-credentials` Secret (intentional — protects against accidental credential loss). Delete manually with `kubectl delete secret object-storage-credentials -n michelangelo` if you want it gone. +- CRDs created by the chart (Helm does not delete CRDs by default to prevent data loss). To purge, run `kubectl delete crd -l app.kubernetes.io/managed-by=Helm,app.kubernetes.io/part-of=michelangelo`. +- Any data in your metadata or object storage — those live outside the chart. + +## Troubleshooting + +**The apiserver Pod is stuck in `Init:0/1`.** + +The `schema-init` init container is waiting for metadata storage. Check: + +```bash +kubectl logs -c schema-init +``` + +Common causes: wrong `metadataStorage.host`, wrong `rootPassword`, network policy blocking the connection, RDS security group not allowing the cluster's egress CIDR. + +**The UI loads but shows "Failed to fetch" in the browser console.** + +The browser is hitting `ui.apiBaseUrl` directly — if envoy is on a ClusterIP Service the browser cannot reach it. Either expose envoy through a NodePort/LoadBalancer/Ingress and update `ui.apiBaseUrl`, or port-forward `svc/-envoy` and set `ui.apiBaseUrl=http://localhost:8081`. + +**`helm install` fails with `metadataStorage.host is required`.** + +You did not provide `--set metadataStorage.host=...` or `-f values-k3d.yaml`. The chart fails fast on missing required values — see the full list in the values reference table above. + +**`helm install` fails with `workflow.endpoint is required`.** + +Same as above for the workflow engine. If installing against k3d, pass `-f helm/michelangelo/values-k3d.yaml` which sets `workflow.endpoint=cadence:7933`. + +**Worker logs `connection refused` to the workflow engine.** + +The Cadence/Temporal service is not reachable at `workflow.endpoint`. Verify with: + +```bash +kubectl run -it --rm debug --image=busybox --restart=Never -- nc -zv +``` + +If using Temporal, confirm `workflow.engine=temporal` (the worker renders different config blocks for each engine — wrong engine value produces silent connection failures). + +**Multiple installs collide on resource names.** + +Set distinct release names. All resources are prefixed with `{{ include "michelangelo.fullname" . }}` which incorporates `.Release.Name` — installs in different namespaces with the same release name will not collide on namespaced resources, but will collide on cluster-scoped ones (CRDs, ClusterRoles). + +## Contributing + +Issues and PRs welcome at https://github.com/michelangelo-ai/michelangelo. Run `helm lint ./helm/michelangelo` and `helm template ./helm/michelangelo -f helm/michelangelo/values-k3d.yaml` locally before submitting changes to chart files. diff --git a/helm/michelangelo/crds/.gitkeep b/helm/michelangelo/crds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/helm/michelangelo/crds/README.md b/helm/michelangelo/crds/README.md new file mode 100644 index 000000000..b6c350e58 --- /dev/null +++ b/helm/michelangelo/crds/README.md @@ -0,0 +1,12 @@ +# CRDs + +This directory is intentionally empty. + +Michelangelo's CRDs are registered (and updated) by the apiserver at startup +through its `crdSync.enableCRDUpdate: true` config. The apiserver calls +`crd.SyncCRDs()` (`go/api/crd/sync.go`) which generates each CustomResource +Definition from the registered protobuf types in `proto-go/api/v2/*` and +applies them via the Kubernetes API. + +If a future change ships static CRD YAML, drop the manifests in this directory +and Helm will install them before any chart templates render. diff --git a/helm/michelangelo/files/schema/mysql-init-schema.sql b/helm/michelangelo/files/schema/mysql-init-schema.sql new file mode 100644 index 000000000..d7fd0178b --- /dev/null +++ b/helm/michelangelo/files/schema/mysql-init-schema.sql @@ -0,0 +1,578 @@ +-- ============================================================================== +-- Michelangelo Ingester - Complete Database Schema +-- ============================================================================== +-- This schema includes ALL 13 CRDs watched by the ingester +-- Safe for production and sandbox (idempotent with IF NOT EXISTS) +-- Generated based on protobuf GetIndexedKeyValuePairs() methods +-- ============================================================================== + +CREATE DATABASE IF NOT EXISTS michelangelo; +USE michelangelo; + +-- ============================================================================== +-- 1. MODEL +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `model` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + `algorithm` VARCHAR(255), + `training_framework` VARCHAR(255), + `owner` VARCHAR(255), + `source` VARCHAR(255), + `description` VARCHAR(768), + `model_kind` VARCHAR(255), + `package_type` VARCHAR(255), + `revision_id` VARCHAR(255), + `src_pipeline_run_namespace` VARCHAR(255), + `src_pipeline_run_name` VARCHAR(255), + `model_family_namespace` VARCHAR(255), + `model_family_name` VARCHAR(255), + `feature_eval_report_namespace` VARCHAR(255), + `feature_eval_report_name` VARCHAR(255), + `performance_eval_report_namespace` VARCHAR(255), + `performance_eval_report_name` VARCHAR(255), + `feature_quality_report_namespace` VARCHAR(255), + `feature_quality_report_name` VARCHAR(255), + `explainability_report_namespace` VARCHAR(255), + `explainability_report_name` VARCHAR(255), + PRIMARY KEY (`uid`), + KEY `model_namespace_name` (`namespace`, `name`), + KEY `model_create_time` (`create_time`), + KEY `model_algorithm` (`algorithm`), + KEY `model_owner` (`owner`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `model_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `model_labels_uid` (`obj_uid`), + KEY `model_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `model_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `model_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 2. MODEL_FAMILY +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `modelfamily` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + `model_family_name` VARCHAR(255), + PRIMARY KEY (`uid`), + KEY `modelfamily_namespace_name` (`namespace`, `name`), + KEY `modelfamily_create_time` (`create_time`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `modelfamily_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `modelfamily_labels_uid` (`obj_uid`), + KEY `modelfamily_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `modelfamily_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `modelfamily_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 3. PIPELINE +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `pipeline` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + `owner` VARCHAR(255), + `pipeline_type` VARCHAR(255), + PRIMARY KEY (`uid`), + KEY `pipeline_namespace_name` (`namespace`, `name`), + KEY `pipeline_create_time` (`create_time`), + KEY `pipeline_owner` (`owner`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `pipeline_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `pipeline_labels_uid` (`obj_uid`), + KEY `pipeline_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `pipeline_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `pipeline_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 4. PIPELINE_RUN +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `pipelinerun` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + `pipeline_namespace` VARCHAR(255), + `pipeline_name` VARCHAR(255), + `revision_namespace` VARCHAR(255), + `revision_name` VARCHAR(255), + `resume_pipeline_run_namespace` VARCHAR(255), + `resume_pipeline_run_name` VARCHAR(255), + `state` VARCHAR(255), + `actor` VARCHAR(255), + `end_time` DATETIME, + `exception_type` VARCHAR(255), + PRIMARY KEY (`uid`), + KEY `pipelinerun_namespace_name` (`namespace`, `name`), + KEY `pipelinerun_create_time` (`create_time`), + KEY `pipelinerun_pipeline` (`pipeline_namespace`, `pipeline_name`), + KEY `pipelinerun_state` (`state`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `pipelinerun_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `pipelinerun_labels_uid` (`obj_uid`), + KEY `pipelinerun_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `pipelinerun_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `pipelinerun_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 5. DEPLOYMENT +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `deployment` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + `state` VARCHAR(255), + `target_definition_type` VARCHAR(255), + `current_revision_namespace` VARCHAR(255), + `current_revision_name` VARCHAR(255), + `deletion_requested_timestamp` DATETIME, + PRIMARY KEY (`uid`), + KEY `deployment_namespace_name` (`namespace`, `name`), + KEY `deployment_create_time` (`create_time`), + KEY `deployment_state` (`state`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `deployment_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `deployment_labels_uid` (`obj_uid`), + KEY `deployment_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `deployment_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `deployment_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 6. INFERENCE_SERVER +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `inferenceserver` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + `state` VARCHAR(255), + PRIMARY KEY (`uid`), + KEY `inferenceserver_namespace_name` (`namespace`, `name`), + KEY `inferenceserver_create_time` (`create_time`), + KEY `inferenceserver_state` (`state`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `inferenceserver_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `inferenceserver_labels_uid` (`obj_uid`), + KEY `inferenceserver_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `inferenceserver_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `inferenceserver_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 7. PROJECT +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `project` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + `tier` VARCHAR(255), + PRIMARY KEY (`uid`), + KEY `project_namespace_name` (`namespace`, `name`), + KEY `project_create_time` (`create_time`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `project_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `project_labels_uid` (`obj_uid`), + KEY `project_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `project_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `project_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 8. REVISION +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `revision` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + `base_resource_namespace` VARCHAR(255), + `base_resource_name` VARCHAR(255), + `base_type` VARCHAR(255), + `commit_branch` VARCHAR(255), + `git_ref` VARCHAR(255), + `owner` VARCHAR(255), + PRIMARY KEY (`uid`), + KEY `revision_namespace_name` (`namespace`, `name`), + KEY `revision_create_time` (`create_time`), + KEY `revision_base_resource` (`base_resource_namespace`, `base_resource_name`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `revision_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `revision_labels_uid` (`obj_uid`), + KEY `revision_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `revision_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `revision_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 9. CLUSTER +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `cluster` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + PRIMARY KEY (`uid`), + KEY `cluster_namespace_name` (`namespace`, `name`), + KEY `cluster_create_time` (`create_time`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `cluster_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `cluster_labels_uid` (`obj_uid`), + KEY `cluster_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `cluster_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `cluster_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 10. RAY_CLUSTER +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `raycluster` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + PRIMARY KEY (`uid`), + KEY `raycluster_namespace_name` (`namespace`, `name`), + KEY `raycluster_create_time` (`create_time`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `raycluster_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `raycluster_labels_uid` (`obj_uid`), + KEY `raycluster_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `raycluster_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `raycluster_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 11. RAY_JOB +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `rayjob` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + PRIMARY KEY (`uid`), + KEY `rayjob_namespace_name` (`namespace`, `name`), + KEY `rayjob_create_time` (`create_time`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `rayjob_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `rayjob_labels_uid` (`obj_uid`), + KEY `rayjob_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `rayjob_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `rayjob_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 12. SPARK_JOB +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `sparkjob` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + PRIMARY KEY (`uid`), + KEY `sparkjob_namespace_name` (`namespace`, `name`), + KEY `sparkjob_create_time` (`create_time`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `sparkjob_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `sparkjob_labels_uid` (`obj_uid`), + KEY `sparkjob_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `sparkjob_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `sparkjob_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- 13. TRIGGER_RUN +-- ============================================================================== +CREATE TABLE IF NOT EXISTS `triggerrun` ( + `uid` VARCHAR(255) NOT NULL, + `group_ver` VARCHAR(255) NOT NULL, + `namespace` VARCHAR(255) NOT NULL, + `name` VARCHAR(255) NOT NULL, + `res_version` BIGINT UNSIGNED NOT NULL, + `create_time` DATETIME NOT NULL, + `update_time` DATETIME, + `delete_time` DATETIME, + `proto` MEDIUMBLOB, + `json` JSON, + `pipeline_namespace` VARCHAR(255), + `pipeline_name` VARCHAR(255), + `revision_namespace` VARCHAR(255), + `revision_name` VARCHAR(255), + `state` VARCHAR(255), + `auto_flip` VARCHAR(255), + PRIMARY KEY (`uid`), + KEY `triggerrun_namespace_name` (`namespace`, `name`), + KEY `triggerrun_create_time` (`create_time`), + KEY `triggerrun_pipeline` (`pipeline_namespace`, `pipeline_name`), + KEY `triggerrun_state` (`state`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `triggerrun_labels` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` VARCHAR(63), + PRIMARY KEY (`id`), + KEY `triggerrun_labels_uid` (`obj_uid`), + KEY `triggerrun_labels_key_value` (`key`, `value`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS `triggerrun_annotations` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `obj_uid` VARCHAR(255) NOT NULL, + `key` VARCHAR(255) NOT NULL, + `value` TEXT, + PRIMARY KEY (`id`), + KEY `triggerrun_annotations_uid` (`obj_uid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- ============================================================================== +-- SUMMARY +-- ============================================================================== +SELECT 'Complete schema initialization finished!' as status; +SELECT COUNT(*) as table_count FROM information_schema.tables +WHERE table_schema = 'michelangelo'; + +SELECT table_name, table_rows +FROM information_schema.tables +WHERE table_schema = 'michelangelo' +ORDER BY table_name; diff --git a/helm/michelangelo/templates/NOTES.txt b/helm/michelangelo/templates/NOTES.txt new file mode 100644 index 000000000..9237555c7 --- /dev/null +++ b/helm/michelangelo/templates/NOTES.txt @@ -0,0 +1,90 @@ +Michelangelo {{ .Chart.AppVersion }} is installing into namespace "{{ .Release.Namespace }}" as release "{{ .Release.Name }}". + +Services enabled in this release: +{{- if .Values.apiserver.enabled }} + - apiserver (gRPC on port {{ .Values.apiserver.port }}) +{{- end }} +{{- if .Values.envoy.enabled }} + - envoy (gRPC-Web proxy on port {{ .Values.envoy.port }}) +{{- end }} +{{- if .Values.ui.enabled }} + - ui (React frontend on port {{ .Values.ui.service.port }}) +{{- end }} +{{- if .Values.worker.enabled }} + - worker (workflow client → {{ .Values.workflow.engine }} at {{ .Values.workflow.endpoint }}) +{{- end }} +{{- if .Values.controllermgr.enabled }} + - controllermgr (Kubernetes controller manager) +{{- end }} + +──────────────────────────────────────────────────────────────────────── +1. Verify the install +──────────────────────────────────────────────────────────────────────── + + kubectl --namespace {{ .Release.Namespace }} get pods -l "app.kubernetes.io/instance={{ .Release.Name }}" + +All pods should reach the Running state within ~60 seconds. The apiserver +Pod runs a `schema-init` init container that waits for {{ .Values.metadataStorage.driver }} +at "{{ .Values.metadataStorage.host }}:{{ .Values.metadataStorage.port }}" before starting — +if it stays in Init:0/1, your metadata storage is unreachable. + +──────────────────────────────────────────────────────────────────────── +2. Open the UI +──────────────────────────────────────────────────────────────────────── + +{{- if .Values.ui.enabled }} +{{- if eq .Values.ui.service.type "NodePort" }} + + This release exposes the UI on NodePort {{ .Values.ui.service.nodePort }}. + If your cluster publishes node ports to localhost (e.g. k3d with --port), + open: + + http://localhost:{{ .Values.ui.service.nodePort }} + +{{- else if eq .Values.ui.service.type "LoadBalancer" }} + + This release exposes the UI through a LoadBalancer Service. Get the URL with: + + kubectl --namespace {{ .Release.Namespace }} get svc {{ include "michelangelo.fullname" . }}-ui \ + -o jsonpath='{.status.loadBalancer.ingress[0].hostname}{.status.loadBalancer.ingress[0].ip}' + + The address may take a minute to provision. If empty, fall back to port-forward. + +{{- else }} + + This release uses a ClusterIP Service for the UI (production-safe — no external exposure). + Reach the UI from your workstation with port-forward: + + kubectl --namespace {{ .Release.Namespace }} port-forward svc/{{ include "michelangelo.fullname" . }}-ui 8090:{{ .Values.ui.service.port }} + + Then open http://localhost:8090 + +{{- end }} +{{- else }} + + The UI is disabled in this release (ui.enabled=false). Re-enable with: + + helm upgrade {{ .Release.Name }} michelangelo/michelangelo --reuse-values --set ui.enabled=true + +{{- end }} + +──────────────────────────────────────────────────────────────────────── +3. Run your first pipeline +──────────────────────────────────────────────────────────────────────── + + pip install michelangelo + michelangelo project create my-first-project + michelangelo pipeline run examples/hello_world.py + +Or open the UI and click "New Pipeline" to start from a template. + +──────────────────────────────────────────────────────────────────────── +4. Documentation & support +──────────────────────────────────────────────────────────────────────── + + Docs: https://github.com/michelangelo-ai/michelangelo/tree/main/docs + Issues: https://github.com/michelangelo-ai/michelangelo/issues + Chart: https://github.com/michelangelo-ai/michelangelo/tree/main/helm/michelangelo + +To upgrade: helm upgrade {{ .Release.Name }} michelangelo/michelangelo --reuse-values +To uninstall: helm uninstall {{ .Release.Name }} --namespace {{ .Release.Namespace }} diff --git a/helm/michelangelo/templates/_helpers.tpl b/helm/michelangelo/templates/_helpers.tpl new file mode 100644 index 000000000..fb577b8e3 --- /dev/null +++ b/helm/michelangelo/templates/_helpers.tpl @@ -0,0 +1,107 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "michelangelo.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this +(by the DNS naming spec). If release name contains chart name it will be used +as a full name. +*/}} +{{- define "michelangelo.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "michelangelo.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels applied to every resource. +*/}} +{{- define "michelangelo.labels" -}} +helm.sh/chart: {{ include "michelangelo.chart" . }} +{{ include "michelangelo.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/part-of: michelangelo +{{- end -}} + +{{/* +Selector labels (must be invariant across upgrades — do NOT include version +labels here, otherwise Deployment selectors break on chart bump). +*/}} +{{- define "michelangelo.selectorLabels" -}} +app.kubernetes.io/name: {{ include "michelangelo.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{/* +Per-component selector labels (used by per-Deployment selectors). +Usage: {{ include "michelangelo.componentSelectorLabels" (dict "context" . "component" "apiserver") }} +*/}} +{{- define "michelangelo.componentSelectorLabels" -}} +{{ include "michelangelo.selectorLabels" .context }} +app.kubernetes.io/component: {{ .component }} +{{- end -}} + +{{/* +Per-component full labels (used on metadata of per-Deployment objects). +Usage: {{ include "michelangelo.componentLabels" (dict "context" . "component" "apiserver") }} +*/}} +{{- define "michelangelo.componentLabels" -}} +{{ include "michelangelo.labels" .context }} +app.kubernetes.io/component: {{ .component }} +{{- end -}} + +{{/* +Per-component fullname (release-scoped, prevents cross-namespace collisions). +Usage: {{ include "michelangelo.componentFullname" (dict "context" . "component" "apiserver") }} +Output: -, e.g. "michelangelo-apiserver" +*/}} +{{- define "michelangelo.componentFullname" -}} +{{- printf "%s-%s" (include "michelangelo.fullname" .context) .component | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +ServiceAccount name to use. +*/}} +{{- define "michelangelo.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} +{{- default (include "michelangelo.fullname" .) .Values.serviceAccount.name -}} +{{- else -}} +{{- default "default" .Values.serviceAccount.name -}} +{{- end -}} +{{- end -}} + +{{/* +Object storage credentials Secret name (release-scoped so multiple installs +in different namespaces don't collide; matches design §"Credentials Idempotency"). +*/}} +{{- define "michelangelo.objectStorageSecretName" -}} +{{- printf "%s-object-storage-credentials" (include "michelangelo.fullname" .) | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Schema-init volume name (used by apiserver Deployment + ConfigMap). +*/}} +{{- define "michelangelo.schemaInitConfigMapName" -}} +{{- printf "%s-schema-init" (include "michelangelo.fullname" .) | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/helm/michelangelo/templates/core/apiserver-configmap.yaml b/helm/michelangelo/templates/core/apiserver-configmap.yaml new file mode 100644 index 000000000..235925523 --- /dev/null +++ b/helm/michelangelo/templates/core/apiserver-configmap.yaml @@ -0,0 +1,22 @@ +{{- if .Values.apiserver.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "apiserver-config") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "apiserver") | nindent 4 }} +data: + base.yaml: | + apiserver: + yarpc: + host: 0.0.0.0 + port: {{ .Values.apiserver.port }} + k8s: + qps: {{ .Values.apiserver.k8s.qps }} + burst: {{ .Values.apiserver.k8s.burst }} + metadataStorage: + enableMetadataStorage: {{ .Values.apiserver.metadataStorage.enable }} + crdSync: + enableCRDUpdate: {{ .Values.apiserver.crdSync.enableUpdate }} +{{- end }} diff --git a/helm/michelangelo/templates/core/apiserver-deployment.yaml b/helm/michelangelo/templates/core/apiserver-deployment.yaml new file mode 100644 index 000000000..06f8f42ce --- /dev/null +++ b/helm/michelangelo/templates/core/apiserver-deployment.yaml @@ -0,0 +1,145 @@ +{{- if .Values.apiserver.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "apiserver") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "apiserver") | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "apiserver") | nindent 6 }} + template: + metadata: + labels: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "apiserver") | nindent 8 }} + spec: + serviceAccountName: {{ include "michelangelo.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + initContainers: + - name: wait-for-metadata-storage + image: {{ if eq .Values.metadataStorage.driver "postgres" }}postgres:16{{ else }}mysql:8.0{{ end }} + imagePullPolicy: {{ .Values.images.pullPolicy }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + command: + - /bin/sh + - -c + - | + set -e + MAX_ATTEMPTS=60 + ATTEMPT=0 + while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do + {{- if eq .Values.metadataStorage.driver "postgres" }} + if PGPASSWORD="$METADATA_PASSWORD" psql -h "$METADATA_HOST" -p "$METADATA_PORT" -U "$METADATA_USER" -c "SELECT 1" >/dev/null 2>&1; then + {{- else }} + if mysqladmin ping -h "$METADATA_HOST" -P "$METADATA_PORT" -u "$METADATA_USER" -p"$METADATA_PASSWORD" --silent; then + {{- end }} + echo "metadata storage is ready" + exit 0 + fi + ATTEMPT=$((ATTEMPT+1)) + echo "waiting for metadata storage ($ATTEMPT/$MAX_ATTEMPTS)..." + sleep 2 + done + echo "metadata storage did not become ready" + exit 1 + env: + - name: METADATA_HOST + value: {{ required "metadataStorage.host is required — see helm/michelangelo/README.md#values-reference" .Values.metadataStorage.host | quote }} + - name: METADATA_PORT + value: {{ .Values.metadataStorage.port | quote }} + - name: METADATA_USER + value: {{ .Values.metadataStorage.user | default "root" | quote }} + - name: METADATA_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.metadataStorage.existingSecret | default (printf "%s-metadata-storage" (include "michelangelo.fullname" .)) }} + key: password + - name: schema-init + image: {{ if eq .Values.metadataStorage.driver "postgres" }}postgres:16{{ else }}mysql:8.0{{ end }} + imagePullPolicy: {{ .Values.images.pullPolicy }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + command: + - /bin/sh + - -c + - | + set -e + echo "applying ingester schema to $METADATA_DATABASE on $METADATA_HOST..." + {{- if eq .Values.metadataStorage.driver "postgres" }} + PGPASSWORD="$METADATA_PASSWORD" psql -h "$METADATA_HOST" -p "$METADATA_PORT" -U "$METADATA_USER" -d "$METADATA_DATABASE" -f /schema/init-schema.sql + {{- else }} + mysql -h "$METADATA_HOST" -P "$METADATA_PORT" -u "$METADATA_USER" -p"$METADATA_PASSWORD" < /schema/init-schema.sql + {{- end }} + echo "schema applied successfully" + env: + - name: METADATA_HOST + value: {{ .Values.metadataStorage.host | quote }} + - name: METADATA_PORT + value: {{ .Values.metadataStorage.port | quote }} + - name: METADATA_USER + value: {{ .Values.metadataStorage.user | default "root" | quote }} + - name: METADATA_DATABASE + value: {{ .Values.metadataStorage.database | quote }} + - name: METADATA_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.metadataStorage.existingSecret | default (printf "%s-metadata-storage" (include "michelangelo.fullname" .)) }} + key: password + volumeMounts: + - name: schema + mountPath: /schema + readOnly: true + containers: + - name: apiserver + image: {{ .Values.images.apiserver }} + imagePullPolicy: {{ .Values.images.pullPolicy }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + ports: + - name: grpc + containerPort: {{ .Values.apiserver.port }} + protocol: TCP + volumeMounts: + - name: config + mountPath: /config + {{- with (or .Values.apiserver.resources .Values.resources) }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: config + configMap: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "apiserver-config") }} + - name: schema + configMap: + name: {{ include "michelangelo.schemaInitConfigMapName" . }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/core/apiserver-schema-init-configmap.yaml b/helm/michelangelo/templates/core/apiserver-schema-init-configmap.yaml new file mode 100644 index 000000000..93c6e253f --- /dev/null +++ b/helm/michelangelo/templates/core/apiserver-schema-init-configmap.yaml @@ -0,0 +1,12 @@ +{{- if .Values.apiserver.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "michelangelo.schemaInitConfigMapName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "apiserver") | nindent 4 }} +data: + init-schema.sql: | +{{ .Files.Get "files/schema/mysql-init-schema.sql" | indent 4 }} +{{- end }} diff --git a/helm/michelangelo/templates/core/apiserver-service.yaml b/helm/michelangelo/templates/core/apiserver-service.yaml new file mode 100644 index 000000000..3a3b938f2 --- /dev/null +++ b/helm/michelangelo/templates/core/apiserver-service.yaml @@ -0,0 +1,20 @@ +{{- if .Values.apiserver.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "apiserver") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "apiserver") | nindent 4 }} +spec: + type: {{ .Values.apiserver.service.type }} + selector: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "apiserver") | nindent 4 }} + ports: + - name: grpc + port: {{ .Values.apiserver.port }} + targetPort: grpc + {{- if eq .Values.apiserver.service.type "NodePort" }} + nodePort: {{ required "apiserver.service.nodePort is required when apiserver.service.type=NodePort" .Values.apiserver.service.nodePort }} + {{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/core/controllermgr-configmap.yaml b/helm/michelangelo/templates/core/controllermgr-configmap.yaml new file mode 100644 index 000000000..9712a4bd1 --- /dev/null +++ b/helm/michelangelo/templates/core/controllermgr-configmap.yaml @@ -0,0 +1,71 @@ +{{- if .Values.controllermgr.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "controllermgr-config") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "controllermgr") | nindent 4 }} +data: + base.yaml: | + controllermgr: + metricsBindAddress: ":{{ .Values.controllermgr.metricsPort }}" + healthProbeBindAddress: ":{{ .Values.controllermgr.healthPort }}" + leaderElection: {{ .Values.controllermgr.leaderElection }} + leaderElectionID: {{ .Values.controllermgr.leaderElectionID | quote }} + + controllers: + rayCluster: + k8sQps: {{ .Values.controllermgr.controllers.rayCluster.qps }} + k8sBurst: {{ .Values.controllermgr.controllers.rayCluster.burst }} + + minio: + awsRegion: {{ .Values.objectStorage.region | default "us-east-1" | quote }} + awsAccessKeyId: ${AWS_ACCESS_KEY_ID} + awsSecretAccessKey: ${AWS_SECRET_ACCESS_KEY} + awsEndpointUrl: {{ required "objectStorage.endpoint is required — see helm/michelangelo/README.md#values-reference" .Values.objectStorage.endpoint | quote }} + secure: {{ .Values.objectStorage.secure }} + + metadataStorage: + enableMetadataStorage: {{ .Values.controllermgr.metadataStorage.enable }} + deletionDelay: {{ .Values.controllermgr.metadataStorage.deletionDelay | quote }} + enableResourceVersionCache: {{ .Values.controllermgr.metadataStorage.enableResourceVersionCache }} + + mysql: + host: {{ required "metadataStorage.host is required — see helm/michelangelo/README.md#values-reference" .Values.metadataStorage.host | quote }} + port: {{ .Values.metadataStorage.port }} + user: {{ .Values.metadataStorage.user | default "root" | quote }} + password: ${MYSQL_PASSWORD} + database: {{ .Values.metadataStorage.database | quote }} + + ingester: + concurrentReconciles: {{ .Values.controllermgr.ingester.concurrentReconciles }} + requeuePeriod: {{ .Values.controllermgr.ingester.requeuePeriod | quote }} + concurrentReconcilesMap: + {{- range $k, $v := .Values.controllermgr.ingester.concurrentReconcilesMap }} + {{ $k }}: {{ $v }} + {{- end }} + + workflowClient: + {{- if eq .Values.workflow.engine "cadence" }} + service: cadence-frontend + host: {{ required "workflow.endpoint is required — see helm/michelangelo/README.md#values-reference" .Values.workflow.endpoint | quote }} + transport: grpc + domain: {{ .Values.workflow.domain | default "default" | quote }} + taskList: default + {{- with .Values.controllermgr.workflowClient.executionUrlFormat }} + executionUrlFormat: {{ . | quote }} + {{- end }} + {{- else if eq .Values.workflow.engine "temporal" }} + service: temporal-frontend + host: {{ required "workflow.endpoint is required — see helm/michelangelo/README.md#values-reference" .Values.workflow.endpoint | quote }} + transport: grpc + domain: {{ .Values.workflow.domain | default "default" | quote }} + provider: Temporal + {{- with .Values.controllermgr.workflowClient.executionUrlFormat }} + executionUrlFormat: {{ . | quote }} + {{- end }} + {{- else }} + {{- fail (printf "workflow.engine must be 'cadence' or 'temporal', got: %s" .Values.workflow.engine) }} + {{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/core/controllermgr-deployment.yaml b/helm/michelangelo/templates/core/controllermgr-deployment.yaml new file mode 100644 index 000000000..c328bf6b3 --- /dev/null +++ b/helm/michelangelo/templates/core/controllermgr-deployment.yaml @@ -0,0 +1,75 @@ +{{- if .Values.controllermgr.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "controllermgr") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "controllermgr") | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "controllermgr") | nindent 6 }} + template: + metadata: + labels: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "controllermgr") | nindent 8 }} + spec: + serviceAccountName: {{ include "michelangelo.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: app + image: {{ .Values.images.controllermgr }} + imagePullPolicy: {{ .Values.images.pullPolicy }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + envFrom: + - secretRef: + name: {{ .Values.objectStorage.existingSecret | default (include "michelangelo.objectStorageSecretName" .) }} + env: + - name: MYSQL_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.metadataStorage.existingSecret | default (printf "%s-metadata-storage" (include "michelangelo.fullname" .)) }} + key: password + ports: + - name: metrics + containerPort: {{ .Values.controllermgr.metricsPort }} + protocol: TCP + - name: health + containerPort: {{ .Values.controllermgr.healthPort }} + protocol: TCP + volumeMounts: + - name: config + mountPath: /config + {{- with (or .Values.controllermgr.resources .Values.resources) }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: config + configMap: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "controllermgr-config") }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/core/controllermgr-service.yaml b/helm/michelangelo/templates/core/controllermgr-service.yaml new file mode 100644 index 000000000..fccfd2fed --- /dev/null +++ b/helm/michelangelo/templates/core/controllermgr-service.yaml @@ -0,0 +1,20 @@ +{{- if .Values.controllermgr.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "controllermgr") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "controllermgr") | nindent 4 }} +spec: + type: ClusterIP + selector: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "controllermgr") | nindent 4 }} + ports: + - name: metrics + port: {{ .Values.controllermgr.metricsPort }} + targetPort: metrics + - name: health + port: {{ .Values.controllermgr.healthPort }} + targetPort: health +{{- end }} diff --git a/helm/michelangelo/templates/core/envoy-configmap.yaml b/helm/michelangelo/templates/core/envoy-configmap.yaml new file mode 100644 index 000000000..5a94db50c --- /dev/null +++ b/helm/michelangelo/templates/core/envoy-configmap.yaml @@ -0,0 +1,67 @@ +{{- if .Values.envoy.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "envoy-config") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "envoy") | nindent 4 }} +data: + envoy.yaml: | + static_resources: + listeners: + - name: michelangelo-ui-listener + address: + socket_address: { address: 0.0.0.0, port_value: {{ .Values.envoy.port }} } + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + stat_prefix: ingress_http + codec_type: AUTO + route_config: + name: local_route + virtual_hosts: + - name: backend + domains: ["*"] + routes: + - match: + prefix: "/" + route: + cluster: michelangelo-apiserver + max_grpc_timeout: 0s + cors: + allow_origin_string_match: + - safe_regex: + regex: {{ .Values.envoy.corsOrigins | default ".*" | quote }} + allow_methods: "GET, POST, OPTIONS" + allow_headers: "content-type,context-ttl-ms,grpc-timeout,rpc-caller,rpc-encoding,rpc-service,x-grpc-web,x-user-agent" + max_age: "1728000" + expose_headers: "grpc-status,grpc-message" + http_filters: + - name: envoy.filters.http.grpc_web + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.grpc_web.v3.GrpcWeb + - name: envoy.filters.http.cors + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.cors.v3.Cors + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + clusters: + - name: michelangelo-apiserver + connect_timeout: 1s + type: LOGICAL_DNS + http2_protocol_options: {} + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: michelangelo-apiserver + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: {{ include "michelangelo.componentFullname" (dict "context" . "component" "apiserver") }} + port_value: {{ .Values.apiserver.port }} +{{- end }} diff --git a/helm/michelangelo/templates/core/envoy-deployment.yaml b/helm/michelangelo/templates/core/envoy-deployment.yaml new file mode 100644 index 000000000..b2867d5d9 --- /dev/null +++ b/helm/michelangelo/templates/core/envoy-deployment.yaml @@ -0,0 +1,64 @@ +{{- if .Values.envoy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "envoy") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "envoy") | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "envoy") | nindent 6 }} + template: + metadata: + labels: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "envoy") | nindent 8 }} + spec: + serviceAccountName: {{ include "michelangelo.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: envoy + image: {{ .Values.images.envoy }} + imagePullPolicy: {{ .Values.images.pullPolicy }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + args: ["-c", "/etc/envoy/envoy.yaml"] + ports: + - name: http + containerPort: {{ .Values.envoy.port }} + protocol: TCP + volumeMounts: + - name: config + mountPath: /etc/envoy + {{- with (or .Values.envoy.resources .Values.resources) }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: config + configMap: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "envoy-config") }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/core/envoy-service.yaml b/helm/michelangelo/templates/core/envoy-service.yaml new file mode 100644 index 000000000..4c8d52ae4 --- /dev/null +++ b/helm/michelangelo/templates/core/envoy-service.yaml @@ -0,0 +1,20 @@ +{{- if .Values.envoy.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "envoy") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "envoy") | nindent 4 }} +spec: + type: {{ .Values.envoy.service.type }} + selector: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "envoy") | nindent 4 }} + ports: + - name: http + port: {{ .Values.envoy.port }} + targetPort: http + {{- if eq .Values.envoy.service.type "NodePort" }} + nodePort: {{ required "envoy.service.nodePort is required when envoy.service.type=NodePort" .Values.envoy.service.nodePort }} + {{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/core/metadata-storage-secret.yaml b/helm/michelangelo/templates/core/metadata-storage-secret.yaml new file mode 100644 index 000000000..a2b353b8b --- /dev/null +++ b/helm/michelangelo/templates/core/metadata-storage-secret.yaml @@ -0,0 +1,14 @@ +{{- if and .Values.metadataStorage.rootPassword (not .Values.metadataStorage.existingSecret) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ printf "%s-metadata-storage" (include "michelangelo.fullname" .) }} + namespace: {{ .Release.Namespace }} + annotations: + helm.sh/resource-policy: keep + labels: + {{- include "michelangelo.labels" . | nindent 4 }} +type: Opaque +stringData: + password: {{ .Values.metadataStorage.rootPassword | quote }} +{{- end }} diff --git a/helm/michelangelo/templates/core/object-storage-secret.yaml b/helm/michelangelo/templates/core/object-storage-secret.yaml new file mode 100644 index 000000000..0f559506a --- /dev/null +++ b/helm/michelangelo/templates/core/object-storage-secret.yaml @@ -0,0 +1,15 @@ +{{- if not .Values.objectStorage.existingSecret }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "michelangelo.objectStorageSecretName" . }} + namespace: {{ .Release.Namespace }} + annotations: + helm.sh/resource-policy: keep + labels: + {{- include "michelangelo.labels" . | nindent 4 }} +type: Opaque +stringData: + AWS_ACCESS_KEY_ID: {{ required "objectStorage.accessKeyId is required when objectStorage.existingSecret is not set — see helm/michelangelo/README.md#values-reference" .Values.objectStorage.accessKeyId | quote }} + AWS_SECRET_ACCESS_KEY: {{ required "objectStorage.secretAccessKey is required when objectStorage.existingSecret is not set — see helm/michelangelo/README.md#values-reference" .Values.objectStorage.secretAccessKey | quote }} +{{- end }} diff --git a/helm/michelangelo/templates/core/ui-configmap.yaml b/helm/michelangelo/templates/core/ui-configmap.yaml new file mode 100644 index 000000000..0d8889dbd --- /dev/null +++ b/helm/michelangelo/templates/core/ui-configmap.yaml @@ -0,0 +1,14 @@ +{{- if .Values.ui.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "ui-config") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "ui") | nindent 4 }} +data: + config.json: | + { + "apiBaseUrl": {{ required "ui.apiBaseUrl is required when ui.enabled=true — see helm/michelangelo/README.md#values-reference" .Values.ui.apiBaseUrl | quote }} + } +{{- end }} diff --git a/helm/michelangelo/templates/core/ui-deployment.yaml b/helm/michelangelo/templates/core/ui-deployment.yaml new file mode 100644 index 000000000..709ad8093 --- /dev/null +++ b/helm/michelangelo/templates/core/ui-deployment.yaml @@ -0,0 +1,64 @@ +{{- if .Values.ui.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "ui") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "ui") | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "ui") | nindent 6 }} + template: + metadata: + labels: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "ui") | nindent 8 }} + spec: + serviceAccountName: {{ include "michelangelo.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: ui + image: {{ .Values.images.ui }} + imagePullPolicy: {{ .Values.images.pullPolicy }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + ports: + - name: http + containerPort: 80 + protocol: TCP + volumeMounts: + - name: public-config + mountPath: /usr/share/nginx/html/config.json + subPath: config.json + {{- with (or .Values.ui.resources .Values.resources) }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: public-config + configMap: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "ui-config") }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/core/ui-service.yaml b/helm/michelangelo/templates/core/ui-service.yaml new file mode 100644 index 000000000..9ccf31346 --- /dev/null +++ b/helm/michelangelo/templates/core/ui-service.yaml @@ -0,0 +1,20 @@ +{{- if .Values.ui.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "ui") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "ui") | nindent 4 }} +spec: + type: {{ .Values.ui.service.type }} + selector: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "ui") | nindent 4 }} + ports: + - name: http + port: {{ .Values.ui.service.port }} + targetPort: http + {{- if eq .Values.ui.service.type "NodePort" }} + nodePort: {{ required "ui.service.nodePort is required when ui.service.type=NodePort" .Values.ui.service.nodePort }} + {{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/core/worker-configmap.yaml b/helm/michelangelo/templates/core/worker-configmap.yaml new file mode 100644 index 000000000..031b97633 --- /dev/null +++ b/helm/michelangelo/templates/core/worker-configmap.yaml @@ -0,0 +1,44 @@ +{{- if .Values.worker.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "worker-config") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "worker") | nindent 4 }} +data: + base.yaml: | + worker: + address: {{ include "michelangelo.componentFullname" (dict "context" . "component" "apiserver") }}:{{ .Values.apiserver.port }} + maApiServiceName: ma-apiserver + + logging: + level: {{ .Values.worker.logging.level | default "info" }} + development: {{ .Values.worker.logging.development | default false }} + encoding: {{ .Values.worker.logging.encoding | default "console" }} + + workflow-engine: + host: {{ required "workflow.endpoint is required — see helm/michelangelo/README.md#values-reference" .Values.workflow.endpoint | quote }} + transport: grpc + {{- if eq .Values.workflow.engine "cadence" }} + provider: cadence + {{- else if eq .Values.workflow.engine "temporal" }} + provider: temporal + {{- else }} + {{- fail (printf "workflow.engine must be 'cadence' or 'temporal', got: %s" .Values.workflow.engine) }} + {{- end }} + workers: + {{- range $tl := .Values.workflow.taskLists }} + - domain: {{ $.Values.workflow.domain | default "default" | quote }} + taskList: {{ $tl | quote }} + {{- end }} + client: + domain: {{ .Values.workflow.domain | default "default" | quote }} + + minio: + awsRegion: {{ .Values.objectStorage.region | default "us-east-1" | quote }} + awsAccessKeyId: ${AWS_ACCESS_KEY_ID} + awsSecretAccessKey: ${AWS_SECRET_ACCESS_KEY} + awsEndpointUrl: {{ required "objectStorage.endpoint is required — see helm/michelangelo/README.md#values-reference" .Values.objectStorage.endpoint | quote }} + secure: {{ .Values.objectStorage.secure }} +{{- end }} diff --git a/helm/michelangelo/templates/core/worker-deployment.yaml b/helm/michelangelo/templates/core/worker-deployment.yaml new file mode 100644 index 000000000..40dc461ab --- /dev/null +++ b/helm/michelangelo/templates/core/worker-deployment.yaml @@ -0,0 +1,62 @@ +{{- if .Values.worker.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "worker") }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.componentLabels" (dict "context" . "component" "worker") | nindent 4 }} +spec: + replicas: {{ .Values.worker.replicas }} + selector: + matchLabels: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "worker") | nindent 6 }} + template: + metadata: + labels: + {{- include "michelangelo.componentSelectorLabels" (dict "context" . "component" "worker") | nindent 8 }} + spec: + serviceAccountName: {{ include "michelangelo.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: app + image: {{ .Values.images.worker }} + imagePullPolicy: {{ .Values.images.pullPolicy }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + envFrom: + - secretRef: + name: {{ .Values.objectStorage.existingSecret | default (include "michelangelo.objectStorageSecretName" .) }} + volumeMounts: + - name: worker-config + mountPath: /config + {{- with (or .Values.worker.resources .Values.resources) }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: worker-config + configMap: + name: {{ include "michelangelo.componentFullname" (dict "context" . "component" "worker-config") }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/rbac/clusterrole.yaml b/helm/michelangelo/templates/rbac/clusterrole.yaml new file mode 100644 index 000000000..aaef156ad --- /dev/null +++ b/helm/michelangelo/templates/rbac/clusterrole.yaml @@ -0,0 +1,171 @@ +{{- if and .Values.serviceAccount.create (not .Values.controllermgr.watchNamespace) -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "michelangelo.fullname" . }} + labels: + {{- include "michelangelo.labels" . | nindent 4 }} +rules: + # CRD lifecycle — apiserver crdSync registers/updates CRDs at startup + - apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + + # Michelangelo CRs — full reconcile lifecycle + - apiGroups: ["michelangelo.api"] + resources: + - projects + - models + - modelfamilies + - pipelines + - pipelineruns + - revisions + - inferenceservers + - clusters + - rayclusters + - rayjobs + - sparkjobs + - triggerruns + - deployments + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + + # Michelangelo CR status subresource updates (controller pattern) + - apiGroups: ["michelangelo.api"] + resources: + - projects/status + - models/status + - modelfamilies/status + - pipelines/status + - pipelineruns/status + - revisions/status + - inferenceservers/status + - clusters/status + - rayclusters/status + - rayjobs/status + - sparkjobs/status + - triggerruns/status + - deployments/status + verbs: ["get", "update", "patch"] + + # KubeRay CRs — controllermgr creates/manages RayCluster + RayJob + - apiGroups: ["ray.io"] + resources: ["rayclusters", "rayjobs"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + + # Spark Operator CRs — controllermgr creates SparkApplication + - apiGroups: ["sparkoperator.k8s.io"] + resources: ["sparkapplications"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + + # Inference deployments use core workloads + networking + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: [""] + resources: ["services"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["pods/log"] + verbs: ["get", "list"] + + # ConfigMaps + Secrets — controllers materialize per-job configs and read + # cluster credentials (compute-cluster Secrets created by sandbox.py) + - apiGroups: [""] + resources: ["configmaps", "secrets"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + + # Namespaces — controllers list namespaces to scope reconcilers + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list", "watch"] + + # Events — controllers emit events on CR objects + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] + + # Leader election (controllermgr.leaderElection: false today, but enabling + # it later requires lease access — keep in least-privilege baseline) + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + + # Inference demo uses Gateway API HTTPRoutes + - apiGroups: ["gateway.networking.k8s.io"] + resources: ["httproutes"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +{{- else if and .Values.serviceAccount.create .Values.controllermgr.watchNamespace }} +{{- range $ns := .Values.controllermgr.watchNamespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "michelangelo.fullname" $ }} + namespace: {{ $ns }} + labels: + {{- include "michelangelo.labels" $ | nindent 4 }} +rules: + - apiGroups: ["michelangelo.api"] + resources: + - projects + - models + - modelfamilies + - pipelines + - pipelineruns + - revisions + - inferenceservers + - clusters + - rayclusters + - rayjobs + - sparkjobs + - triggerruns + - deployments + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["michelangelo.api"] + resources: + - projects/status + - models/status + - modelfamilies/status + - pipelines/status + - pipelineruns/status + - revisions/status + - inferenceservers/status + - clusters/status + - rayclusters/status + - rayjobs/status + - sparkjobs/status + - triggerruns/status + - deployments/status + verbs: ["get", "update", "patch"] + - apiGroups: ["ray.io"] + resources: ["rayclusters", "rayjobs"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["sparkoperator.k8s.io"] + resources: ["sparkapplications"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: [""] + resources: ["services", "configmaps", "secrets"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["pods/log"] + verbs: ["get", "list"] + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["gateway.networking.k8s.io"] + resources: ["httproutes"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +{{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/rbac/clusterrolebinding.yaml b/helm/michelangelo/templates/rbac/clusterrolebinding.yaml new file mode 100644 index 000000000..183adb28f --- /dev/null +++ b/helm/michelangelo/templates/rbac/clusterrolebinding.yaml @@ -0,0 +1,35 @@ +{{- if and .Values.serviceAccount.create (not .Values.controllermgr.watchNamespace) -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "michelangelo.fullname" . }} + labels: + {{- include "michelangelo.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "michelangelo.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ include "michelangelo.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- else if and .Values.serviceAccount.create .Values.controllermgr.watchNamespace }} +{{- range $ns := .Values.controllermgr.watchNamespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "michelangelo.fullname" $ }} + namespace: {{ $ns }} + labels: + {{- include "michelangelo.labels" $ | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "michelangelo.fullname" $ }} +subjects: + - kind: ServiceAccount + name: {{ include "michelangelo.serviceAccountName" $ }} + namespace: {{ $.Release.Namespace }} +{{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/rbac/serviceaccount.yaml b/helm/michelangelo/templates/rbac/serviceaccount.yaml new file mode 100644 index 000000000..516b16202 --- /dev/null +++ b/helm/michelangelo/templates/rbac/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "michelangelo.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/helm/michelangelo/templates/tests/test-connection.yaml b/helm/michelangelo/templates/tests/test-connection.yaml new file mode 100644 index 000000000..8fb236b48 --- /dev/null +++ b/helm/michelangelo/templates/tests/test-connection.yaml @@ -0,0 +1,25 @@ +{{- if .Values.apiserver.enabled }} +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "michelangelo.fullname" . }}-test-connection + namespace: {{ .Release.Namespace }} + labels: + {{- include "michelangelo.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + restartPolicy: Never + containers: + - name: wget + image: busybox:1.36 + command: + - /bin/sh + - -c + - | + set -e + echo "checking apiserver gRPC port {{ .Values.apiserver.port }}..." + nc -zv {{ include "michelangelo.componentFullname" (dict "context" . "component" "apiserver") }} {{ .Values.apiserver.port }} + echo "apiserver is reachable" +{{- end }} diff --git a/helm/michelangelo/values-k3d.yaml b/helm/michelangelo/values-k3d.yaml new file mode 100644 index 000000000..9bc149c10 --- /dev/null +++ b/helm/michelangelo/values-k3d.yaml @@ -0,0 +1,69 @@ +# ----------------------------------------------------------------------------- +# Local k3d overrides for the michelangelo chart. +# +# Used by `michelangelo sandbox up`, which provisions MySQL, MinIO, and Cadence +# in the same cluster, then runs: +# +# helm install michelangelo ./helm/michelangelo -f values-k3d.yaml +# +# The NodePort values below must match the `--port` flags k3d was created with. +# ----------------------------------------------------------------------------- + +metadataStorage: + driver: mysql + host: mysql + port: 3306 + database: michelangelo + rootPassword: root + +objectStorage: + endpoint: minio:9000 + secure: false + region: us-east-1 + bucket: michelangelo + accessKeyId: minioadmin + secretAccessKey: minioadmin + +workflow: + engine: cadence + # Cadence gRPC frontend. Source: michelangelo-worker.yaml:42 and + # michelangelo-controllermgr.yaml:101 both use port 7833. Port 7933 is the + # TChannel endpoint used by the Cadence CLI, not by the worker. + endpoint: cadence:7833 + domain: default + +images: + pullPolicy: IfNotPresent + +apiserver: + service: + type: NodePort + nodePort: 30009 + +envoy: + corsOrigins: "http://localhost:[0-9]+" + service: + type: NodePort + nodePort: 30010 + +ui: + apiBaseUrl: "http://localhost:8081" + service: + type: NodePort + nodePort: 30011 + +# Sandbox is single-node; no scheduling constraints. +nodeSelector: {} +tolerations: [] +affinity: {} + +# Looser security context for sandbox — ingester writes to /tmp and some +# debug images need privilege escalation. Production keeps the chart defaults. +podSecurityContext: + runAsNonRoot: false + runAsUser: 0 +securityContext: + allowPrivilegeEscalation: true + readOnlyRootFilesystem: false + capabilities: + drop: [] diff --git a/helm/michelangelo/values.yaml b/helm/michelangelo/values.yaml new file mode 100644 index 000000000..4c38a9359 --- /dev/null +++ b/helm/michelangelo/values.yaml @@ -0,0 +1,316 @@ +# ----------------------------------------------------------------------------- +# Michelangelo Helm chart values +# +# This chart installs the Michelangelo control plane only. Infrastructure +# (metadata storage, object storage, workflow engine) must be reachable from +# inside the cluster — the chart consumes endpoints and credentials, it does +# not provision them. +# +# For a local development setup with all infrastructure provided, use the +# accompanying `values-k3d.yaml` together with `michelangelo sandbox up`. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# Metadata storage — backing database for Michelangelo CRDs and workflow state. +# ----------------------------------------------------------------------------- +metadataStorage: + # "mysql" or "postgres". Selects the schema-init image and JDBC dialect. + driver: mysql + + # REQUIRED. Hostname of the metadata storage server. + # Examples: "my-rds.us-east-1.rds.amazonaws.com", "mysql.michelangelo.svc.cluster.local" + # Install fails fast with a clear error if this is empty. + host: "" + + # Port. Defaults to 3306 for MySQL, 5432 for Postgres. + port: 3306 + + # Database name. Created by the schema-init container if it does not exist. + database: michelangelo + + # Database user. Defaults to root. + user: root + + # REQUIRED. Root password. Used by both the schema-init container and the + # runtime services. Pass via `--set` from a secrets manager — never commit. + # If you prefer to manage this via an external Secret, set + # `metadataStorage.existingSecret` instead and leave this empty. + rootPassword: "" + + # Optional. Name of a pre-existing Secret with a `password` key. When set, + # the chart reads the root password from this Secret and ignores + # `rootPassword` above. + existingSecret: "" + +# ----------------------------------------------------------------------------- +# Object storage — S3-compatible artifact, model, and log store. +# ----------------------------------------------------------------------------- +objectStorage: + # REQUIRED. S3-compatible endpoint. + # Examples: "s3.amazonaws.com", "storage.googleapis.com", "minio:9000" + endpoint: "" + + # TLS. Set false only for in-cluster MinIO without TLS termination. + secure: true + + # AWS region. Used by S3, GCS HMAC, and MinIO. The chart sets AWS_REGION + # in every service that talks to object storage. + region: us-east-1 + + # Bucket name for artifacts, models, and logs. + bucket: michelangelo + + # REQUIRED unless `existingSecret` is set. Access key ID for object storage. + accessKeyId: "" + + # REQUIRED unless `existingSecret` is set. Secret access key for object storage. + secretAccessKey: "" + + # Optional. Name of a pre-existing Secret with `AWS_ACCESS_KEY_ID` and + # `AWS_SECRET_ACCESS_KEY` keys. When set, the chart consumes this Secret + # and ignores `accessKeyId` / `secretAccessKey` above. + # + # The chart-managed Secret is annotated `helm.sh/resource-policy: keep` + # so `helm upgrade` does not overwrite credentials injected externally. + existingSecret: "" + +# ----------------------------------------------------------------------------- +# Workflow engine — Cadence or Temporal. Required by worker and apiserver. +# ----------------------------------------------------------------------------- +workflow: + # "cadence" or "temporal". Mutually exclusive — picks which provider block + # the worker config renders. + engine: cadence + + # REQUIRED. Workflow engine address. + # Cadence example: "cadence-frontend:7833" + # Temporal example: "temporal-frontend:7233" + endpoint: "" + + # Cadence domain or Temporal namespace. Created out-of-band — the chart + # does not register domains or namespaces. + domain: default + + # Task lists / queues used by the worker. Override only if you need to + # isolate workloads across multiple worker deployments. + taskLists: + - default + - trigger_run + - pipeline_run + +# ----------------------------------------------------------------------------- +# Container images. Override to pin a version or use a mirror. +# ----------------------------------------------------------------------------- +images: + apiserver: ghcr.io/michelangelo-ai/apiserver:main + worker: ghcr.io/michelangelo-ai/worker:main + ui: ghcr.io/michelangelo-ai/ui:main + controllermgr: ghcr.io/michelangelo-ai/controllermgr:main + envoy: envoyproxy/envoy:v1.29-latest + + # IfNotPresent (default), Always, or Never. Use Always when pinning to + # mutable tags like ":main" in development. + pullPolicy: IfNotPresent + +# Image pull secrets for private registries. List of Secret names. +# Example: [{ name: ghcr-creds }] +imagePullSecrets: [] + +# ----------------------------------------------------------------------------- +# apiserver — gRPC API server. Required for any Michelangelo functionality. +# ----------------------------------------------------------------------------- +apiserver: + # Disable the apiserver Deployment. Useful for ops-only installs that want + # the controller manager but no API surface. + enabled: true + + # gRPC port. Other services find the apiserver at this port via Service DNS. + port: 15566 + + # Kubernetes API client tuning for the apiserver. + k8s: + qps: 300 + burst: 600 + + # Apiserver-specific config knobs. Defaults match the source sandbox YAML. + metadataStorage: + enable: false + crdSync: + enableUpdate: true + + # Resource requests and limits. Override per-service if you need different + # allocations than the global `resources` block below. + resources: {} + + service: + # ClusterIP (default), NodePort, or LoadBalancer. + type: ClusterIP + + # Required when type=NodePort. Ignored otherwise. + nodePort: null + +# ----------------------------------------------------------------------------- +# envoy — gRPC-Web proxy. Required for the browser UI. +# ----------------------------------------------------------------------------- +envoy: + enabled: true + + # gRPC-Web port. The UI's `apiBaseUrl` should point here. + port: 8081 + + # CORS origin regex. The UI must be served from an origin that matches. + # Local dev: "http://localhost:[0-9]+" + # Production: "https://michelangelo\\.example\\.com" + corsOrigins: "" + + resources: {} + + service: + type: ClusterIP + nodePort: null + +# ----------------------------------------------------------------------------- +# ui — React frontend served by nginx. +# ----------------------------------------------------------------------------- +ui: + enabled: true + + # REQUIRED when ui.enabled=true. Browser-reachable URL of the envoy proxy. + # The UI loads this from /config.json at runtime — it must be reachable + # from the user's browser, not just from inside the cluster. + # + # Local k3d: "http://localhost:8081" + # Production: "https://michelangelo.example.com/api" + apiBaseUrl: "" + + resources: + requests: + memory: 128Mi + cpu: 100m + limits: + memory: 256Mi + cpu: 200m + + service: + type: ClusterIP + port: 80 + nodePort: null + +# ----------------------------------------------------------------------------- +# worker — Cadence/Temporal workflow client. Executes pipeline runs. +# ----------------------------------------------------------------------------- +worker: + enabled: true + + # Scale horizontally for higher pipeline-run throughput. Workers are + # stateless; runs are distributed by the workflow engine task list. + replicas: 1 + + # Logging configuration for the worker process. + logging: + level: info + development: true + encoding: console + + resources: {} + +# ----------------------------------------------------------------------------- +# controllermgr — Kubernetes controller manager for Michelangelo CRDs. +# ----------------------------------------------------------------------------- +controllermgr: + enabled: true + + # Namespaces this controller manager watches. + # Empty list = watch all namespaces (uses ClusterRole). + # Non-empty = watch only listed namespaces (uses namespaced Role per entry). + watchNamespace: [] + + # Bind addresses for metrics and health probes. + metricsPort: 8091 + healthPort: 8081 + + # Leader election. Off by default to match the source sandbox YAML. + leaderElection: false + leaderElectionID: decaf1259.michelangelo.uber.com + + # Kubernetes API client tuning for the rayCluster controller. + controllers: + rayCluster: + qps: 300 + burst: 600 + + # Metadata storage knobs (ingester). Defaults match the source sandbox YAML. + metadataStorage: + enable: true + deletionDelay: 10s + enableResourceVersionCache: false + + # Per-controller reconciliation concurrency. Override to tune throughput. + ingester: + concurrentReconciles: 2 + requeuePeriod: 30s + concurrentReconcilesMap: + PipelineRun: 10 + Deployment: 3 + Pipeline: 3 + Revision: 3 + + # Workflow client extras (controllermgr-only). + workflowClient: + # Cadence Web URL template. Empty by default; set in values-k3d.yaml for + # the local sandbox or in production to your own Cadence/Temporal UI. + executionUrlFormat: "" + + resources: {} + +# ----------------------------------------------------------------------------- +# ServiceAccount used by every chart-managed Pod. +# ----------------------------------------------------------------------------- +serviceAccount: + # Create a ServiceAccount as part of this release. + create: true + + # Override the generated name. Defaults to the release fullname. + name: "" + + # Annotations applied to the ServiceAccount (e.g. AWS IRSA role ARN). + annotations: {} + +# ----------------------------------------------------------------------------- +# Pod-level security defaults. Applied to every Pod the chart creates. +# ----------------------------------------------------------------------------- +podSecurityContext: + runAsNonRoot: true + runAsUser: 65534 + fsGroup: 65534 + +# Container-level security defaults. Applied to every container. +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false # apiserver writes ephemeral state to /tmp + +# ----------------------------------------------------------------------------- +# Default resource requests/limits. Override per-service in the blocks above +# for finer control. +# ----------------------------------------------------------------------------- +resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 1 + memory: 1Gi + +# ----------------------------------------------------------------------------- +# Scheduling. Applied to every Pod. +# ----------------------------------------------------------------------------- +nodeSelector: {} +tolerations: [] +affinity: {} + +# Extra labels and annotations applied to every chart-managed object. +commonLabels: {} +commonAnnotations: {}