diff --git a/jobs/competitive-test.yml b/jobs/competitive-test.yml
index 4f2c6a08f2..f97f937d63 100644
--- a/jobs/competitive-test.yml
+++ b/jobs/competitive-test.yml
@@ -48,6 +48,9 @@ parameters:
 - name: ssh_key_enabled
   type: boolean
   default: true
+- name: skip_publish
+  type: boolean
+  default: false
 
 jobs:
 - job: ${{ parameters.cloud }}
@@ -89,14 +92,15 @@ jobs:
       engine: ${{ parameters.engine }}
       regions: ${{ parameters.regions }}
       engine_input: ${{ parameters.engine_input }}
-  - template: /steps/publish-results.yml
-    parameters:
-      cloud: ${{ parameters.cloud }}
-      topology: ${{ parameters.topology }}
-      engine: ${{ parameters.engine }}
-      regions: ${{ parameters.regions }}
-      engine_input: ${{ parameters.engine_input }}
-      credential_type: ${{ parameters.credential_type }}
+  - ${{ if not(parameters.skip_publish) }}:
+    - template: /steps/publish-results.yml
+      parameters:
+        cloud: ${{ parameters.cloud }}
+        topology: ${{ parameters.topology }}
+        engine: ${{ parameters.engine }}
+        regions: ${{ parameters.regions }}
+        engine_input: ${{ parameters.engine_input }}
+        credential_type: ${{ parameters.credential_type }}
   - template: /steps/cleanup-resources.yml
     parameters:
       cloud: ${{ parameters.cloud }}
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/config.yaml b/modules/python/clusterloader2/clustermesh-scale/config/config.yaml
new file mode 100644
index 0000000000..6eace02220
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/config.yaml
@@ -0,0 +1,105 @@
+name: clustermesh-scale-test
+
+# Workload: deploy a small fixed number of pods on this cluster (no churn,
+# no traffic). Measurement modules under modules/measurements/ run the actual
+# scale-test instrumentation (cilium agent/operator CPU+memory, kube-apiserver
+# health, mesh-specific PromQL) so each per-cluster JSONL row carries the data
+# needed for cross-cluster comparison in Kusto. The workload is deliberately
+# trivial — fan-out, attribution, and metric coverage are what we're testing
+# in Phase 1; richer workloads land per scenario in Phase 2+.
+
+{{$namespaces := DefaultParam .CL2_NAMESPACES 1}}
+{{$deploymentsPerNamespace := DefaultParam .CL2_DEPLOYMENTS_PER_NAMESPACE 2}}
+{{$replicasPerDeployment := DefaultParam .CL2_REPLICAS_PER_DEPLOYMENT 2}}
+{{$operationTimeout := DefaultParam .CL2_OPERATION_TIMEOUT "15m"}}
+{{$apiServerCallsPerSecond := DefaultParam .CL2_API_SERVER_CALLS_PER_SECOND 5}}
+
+namespace:
+  number: {{$namespaces}}
+  prefix: clustermesh-scale
+  deleteStaleNamespaces: true
+  deleteAutomanagedNamespaces: true
+  enableExistingNamespaces: false
+  deleteNamespaceTimeout: 20m
+
+tuningSets:
+  - name: Sequence
+    parallelismLimitedLoad:
+      parallelismLimit: 1
+  - name: DeploymentCreateQps
+    qpsLoad:
+      qps: {{$apiServerCallsPerSecond}}
+
+steps:
+  # ----- Start measurements -----
+  # control-plane.yaml owns PodStartupLatency + APIResponsivenessPrometheus +
+  # apiserver CPU/mem queries; cilium.yaml owns cilium-agent + cilium-operator
+  # CPU/mem; clustermesh-metrics.yaml owns mesh-specific PromQL (remote-cluster
+  # connectivity, kvstore event rate, identity count, etc.). All three are
+  # gathered later (see "Gather measurements" below) so the steady-state window
+  # is bounded by the workload create/delete pair.
+  - module:
+      path: /modules/measurements/control-plane.yaml
+      params:
+        action: start
+        group: clustermesh-scale-test
+
+  - module:
+      path: /modules/measurements/cilium.yaml
+      params:
+        action: start
+
+  - module:
+      path: /modules/measurements/clustermesh-metrics.yaml
+      params:
+        action: start
+
+  - module:
+      path: /modules/clustermesh.yaml
+      params:
+        actionName: create
+        tuningSet: DeploymentCreateQps
+
+  - module:
+      path: /modules/scale-test.yaml
+      params:
+        actionName: create
+        namespaces: {{$namespaces}}
+        deploymentsPerNamespace: {{$deploymentsPerNamespace}}
+        replicasPerDeployment: {{$replicasPerDeployment}}
+        tuningSet: DeploymentCreateQps
+        operationTimeout: {{$operationTimeout}}
+
+  # ----- Gather measurements -----
+  # Mirror the start block above. Order matches network-scale convention.
+  - module:
+      path: /modules/measurements/control-plane.yaml
+      params:
+        action: gather
+        group: clustermesh-scale-test
+
+  - module:
+      path: /modules/measurements/cilium.yaml
+      params:
+        action: gather
+
+  - module:
+      path: /modules/measurements/clustermesh-metrics.yaml
+      params:
+        action: gather
+
+  - module:
+      path: /modules/scale-test.yaml
+      params:
+        actionName: delete
+        namespaces: {{$namespaces}}
+        deploymentsPerNamespace: {{$deploymentsPerNamespace}}
+        replicasPerDeployment: {{$replicasPerDeployment}}
+        tuningSet: DeploymentCreateQps
+        operationTimeout: {{$operationTimeout}}
+
+  - module:
+      path: /modules/clustermesh.yaml
+      params:
+        actionName: delete
+        tuningSet: DeploymentCreateQps
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/event-throughput.yaml b/modules/python/clusterloader2/clustermesh-scale/config/event-throughput.yaml
new file mode 100644
index 0000000000..439fdc4e71
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/event-throughput.yaml
@@ -0,0 +1,166 @@
+name: clustermesh-event-throughput
+
+# Scale scenario #1: Cross-Cluster Event Throughput.
+#
+# Goal (scale testing.txt line 42-54): determine max sustainable and burst
+# event rates for endpoints, services, and identities propagating across
+# the mesh; measure events/sec processed and time-to-convergence proxy.
+#
+# Sequence (every cluster runs this in parallel; CL2 fan-out lives in
+# steps/engine/.../execute.yml):
+#
+#   1. Start measurements (control-plane, cilium, clustermesh-metrics +
+#      scenario-specific clustermesh-throughput + etcd-metrics).
+#   2. Deploy PodMonitor scraping clustermesh-apiserver.
+#   3. Create N pods + N global Services per cluster at a controlled QPS.
+#   4. Warmup sleep — let initial create-flurry settle into steady state.
+#   5. Burst rolling-restart of every Deployment (closes the "burst"
+#      coverage gap from scale testing.txt line 52).
+#   6. Settle sleep — let kvstore queues drain and propagation latency
+#      histograms accumulate steady-state samples.
+#   7. Gather all measurements.
+#   8. Tear down the workload + PodMonitor.
+
+{{$namespaces := DefaultParam .CL2_NAMESPACES 5}}
+{{$deploymentsPerNamespace := DefaultParam .CL2_DEPLOYMENTS_PER_NAMESPACE 4}}
+{{$replicasPerDeployment := DefaultParam .CL2_REPLICAS_PER_DEPLOYMENT 10}}
+{{$operationTimeout := DefaultParam .CL2_OPERATION_TIMEOUT "20m"}}
+{{$apiServerCallsPerSecond := DefaultParam .CL2_API_SERVER_CALLS_PER_SECOND 20}}
+{{$warmupDuration := DefaultParam .CL2_WARMUP_DURATION "30s"}}
+{{$holdDuration := DefaultParam .CL2_HOLD_DURATION "2m"}}
+{{$restartGeneration := DefaultParam .CL2_RESTART_GENERATION 1}}
+
+namespace:
+  number: {{$namespaces}}
+  prefix: clustermesh-et
+  deleteStaleNamespaces: true
+  deleteAutomanagedNamespaces: true
+  enableExistingNamespaces: false
+  deleteNamespaceTimeout: 20m
+
+tuningSets:
+  - name: Sequence
+    parallelismLimitedLoad:
+      parallelismLimit: 1
+  - name: DeploymentCreateQps
+    qpsLoad:
+      qps: {{$apiServerCallsPerSecond}}
+
+steps:
+  # ----- Start measurements -----
+  - module:
+      path: /modules/measurements/control-plane.yaml
+      params:
+        action: start
+        group: clustermesh-event-throughput
+
+  - module:
+      path: /modules/measurements/cilium.yaml
+      params:
+        action: start
+
+  - module:
+      path: /modules/measurements/clustermesh-metrics.yaml
+      params:
+        action: start
+
+  - module:
+      path: /modules/measurements/clustermesh-throughput.yaml
+      params:
+        action: start
+
+  - module:
+      path: /modules/measurements/etcd-metrics.yaml
+      params:
+        action: start
+
+  - module:
+      path: /modules/clustermesh.yaml
+      params:
+        actionName: create
+        tuningSet: DeploymentCreateQps
+
+  # ----- Workload: create -----
+  - module:
+      path: /modules/event-throughput-workload.yaml
+      params:
+        actionName: create
+        generation: 0
+        namespaces: {{$namespaces}}
+        deploymentsPerNamespace: {{$deploymentsPerNamespace}}
+        replicasPerDeployment: {{$replicasPerDeployment}}
+        tuningSet: DeploymentCreateQps
+        operationTimeout: {{$operationTimeout}}
+
+  # ----- Warmup: let the create-flurry settle into steady state -----
+  - name: Warmup before burst
+    measurements:
+      - Identifier: WarmupSleep
+        Method: Sleep
+        Params:
+          duration: {{$warmupDuration}}
+
+  # ----- Burst: rolling-restart of every Deployment -----
+  - module:
+      path: /modules/event-throughput-workload.yaml
+      params:
+        actionName: restart
+        generation: {{$restartGeneration}}
+        namespaces: {{$namespaces}}
+        deploymentsPerNamespace: {{$deploymentsPerNamespace}}
+        replicasPerDeployment: {{$replicasPerDeployment}}
+        tuningSet: DeploymentCreateQps
+        operationTimeout: {{$operationTimeout}}
+
+  # ----- Settle: let kvstore queues drain post-burst -----
+  - name: Settle after burst
+    measurements:
+      - Identifier: SettleSleep
+        Method: Sleep
+        Params:
+          duration: {{$holdDuration}}
+
+  # ----- Gather measurements -----
+  - module:
+      path: /modules/measurements/control-plane.yaml
+      params:
+        action: gather
+        group: clustermesh-event-throughput
+
+  - module:
+      path: /modules/measurements/cilium.yaml
+      params:
+        action: gather
+
+  - module:
+      path: /modules/measurements/clustermesh-metrics.yaml
+      params:
+        action: gather
+
+  - module:
+      path: /modules/measurements/clustermesh-throughput.yaml
+      params:
+        action: gather
+
+  - module:
+      path: /modules/measurements/etcd-metrics.yaml
+      params:
+        action: gather
+
+  # ----- Workload: delete -----
+  - module:
+      path: /modules/event-throughput-workload.yaml
+      params:
+        actionName: delete
+        generation: {{$restartGeneration}}
+        namespaces: {{$namespaces}}
+        deploymentsPerNamespace: {{$deploymentsPerNamespace}}
+        replicasPerDeployment: {{$replicasPerDeployment}}
+        tuningSet: DeploymentCreateQps
+        operationTimeout: {{$operationTimeout}}
+
+  - module:
+      path: /modules/clustermesh.yaml
+      params:
+        actionName: delete
+        tuningSet: DeploymentCreateQps
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/clustermesh.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/clustermesh.yaml
new file mode 100644
index 0000000000..175387b2ae
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/clustermesh.yaml
@@ -0,0 +1,26 @@
+## ClusterMesh module: deploys a PodMonitor for clustermesh-apiserver so the
+## CL2-spawned Prometheus picks up at least one mesh-side metric per cluster.
+## Phase 1 exit criteria require this — see plan.md Phase 1 line 318.
+
+{{$tuningSet := DefaultParam .tuningSet "DeploymentCreateQps"}}
+{{$interval := DefaultParam .interval "15s"}}
+{{ $replicasPerNamespace := 1 }}
+
+{{if eq .actionName "create"}}
+  {{ $replicasPerNamespace = 1 }}
+{{else}}
+  {{ $replicasPerNamespace = 0 }}
+{{end}}
+
+steps:
+  - name: {{.actionName}} ClusterMesh Pod Monitor
+    phases:
+      - namespaceList:
+        - "monitoring"
+        replicasPerNamespace: {{$replicasPerNamespace}}
+        tuningSet: {{$tuningSet}}
+        objectBundle:
+          - objectTemplatePath: "modules/clustermesh/podmonitor.yaml"
+            basename: clustermesh-apiserver
+            templateFillMap:
+              Interval: {{$interval}}
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/clustermesh/podmonitor.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/clustermesh/podmonitor.yaml
new file mode 100644
index 0000000000..f667f9e94a
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/clustermesh/podmonitor.yaml
@@ -0,0 +1,35 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: clustermesh-apiserver
+  namespace: monitoring
+spec:
+  # Cilium clustermesh-apiserver exposes metrics on port 9963 (apiserver) and
+  # 9964 (kvstoremesh sidecar) when Prometheus integration is enabled. AKS
+  # managed Cilium uses the same upstream defaults. If a future preview
+  # changes these, override via __address__ relabel below.
+  selector:
+    matchLabels:
+      k8s-app: clustermesh-apiserver
+  namespaceSelector:
+    matchNames:
+      - kube-system
+  podMetricsEndpoints:
+    - interval: {{.Interval}}
+      honorLabels: true
+      path: /metrics
+      relabelings:
+        - sourceLabels: [__address__]
+          action: replace
+          targetLabel: __address__
+          regex: (.+?)(\:\d+)?
+          replacement: $1:9963
+    - interval: {{.Interval}}
+      honorLabels: true
+      path: /metrics
+      relabelings:
+        - sourceLabels: [__address__]
+          action: replace
+          targetLabel: __address__
+          regex: (.+?)(\:\d+)?
+          replacement: $1:9964
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/event-throughput-deployment.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/event-throughput-deployment.yaml
new file mode 100644
index 0000000000..06d677b1b0
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/event-throughput-deployment.yaml
@@ -0,0 +1,42 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{.Name}}
+  labels:
+    group: {{.Group}}
+    app: {{.Name}}
+spec:
+  replicas: {{.Replicas}}
+  selector:
+    matchLabels:
+      name: {{.Name}}
+  template:
+    metadata:
+      labels:
+        name: {{.Name}}
+        group: {{.Group}}
+        app: {{.Name}}
+      annotations:
+        # Bumping RestartGeneration in the pod template forces a rolling
+        # restart on the next CL2 apply — the canonical Kubernetes pattern
+        # for triggering deployment rollouts without changing image. This
+        # drives the burst event flurry for scale-scenario #1.
+        restart-generation: "{{.RestartGeneration}}"
+    spec:
+      containers:
+        - name: pause
+          image: mcr.microsoft.com/oss/kubernetes/pause:3.6
+          # pause:3.6 is the Kubernetes pause container — it literally sleeps
+          # forever and consumes single-digit CPU shares + ~few MB. The
+          # earlier 50m CPU / 50Mi memory limits caused per-node CPU
+          # overcommit (~160% of allocatable on Standard_D4s_v4) at
+          # 100 pods/node, which starves the kubelet+CNI sandbox setup and
+          # leaves a few stragglers stuck Pending → CL2 timeout. Tighter
+          # limits here mirror what real pause-pod e2e fixtures use.
+          resources:
+            requests:
+              cpu: 1m
+              memory: 5Mi
+            limits:
+              cpu: 5m
+              memory: 20Mi
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/event-throughput-service.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/event-throughput-service.yaml
new file mode 100644
index 0000000000..7c795f65c3
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/event-throughput-service.yaml
@@ -0,0 +1,27 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{.Name}}
+  labels:
+    group: {{.Group}}
+    app: {{.Name}}
+  annotations:
+    # Modern annotation (Cilium >= 1.13). The clustermesh-apiserver fans
+    # this service's endpoints out to all peer clusters, exercising the
+    # service-propagation path that scale-scenario #1 measures.
+    service.cilium.io/global: "true"
+    # Legacy annotation (pre-1.13). Applied defensively because the AKS
+    # managed Cilium build version is not yet verified by us. Cilium
+    # ignores annotations it does not understand, so carrying both is safe.
+    io.cilium/global-service: "true"
+spec:
+  selector:
+    name: {{.Name}}
+  ports:
+    - port: 80
+      targetPort: 80
+      protocol: TCP
+  # Headless: backends are advertised across the mesh by clustermesh-apiserver
+  # rather than routed through a per-cluster ClusterIP. Reduces noise from
+  # ClusterIP allocation under high churn.
+  clusterIP: None
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/event-throughput-workload.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/event-throughput-workload.yaml
new file mode 100644
index 0000000000..0e0a3e36bd
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/event-throughput-workload.yaml
@@ -0,0 +1,73 @@
+name: clustermesh-event-throughput-workload
+
+# Workload module for scale-scenario #1: Cross-Cluster Event Throughput.
+#
+# Exercises three flavors of cross-cluster events on every cluster in parallel:
+#
+#   create  — bring N pods + N global Services up at a controlled QPS.
+#             Drives endpoint+identity creation events into the local
+#             clustermesh-apiserver, which fans out N*(M-1) writes across
+#             the mesh on every other peer's etcd.
+#   restart — bump a pod-template annotation so the Deployment triggers a
+#             rolling restart. Closes the "burst creation/deletion" gap from
+#             scale testing.txt line 52 — measures peak event-flurry capacity
+#             when an entire cluster's pods churn over within seconds.
+#   delete  — set replicasPerNamespace to 0; drives the symmetric delete-event
+#             throughput number.
+
+{{$actionName := .actionName}}
+{{$generation := DefaultParam .generation 0}}
+{{$namespaces := .namespaces}}
+{{$deploymentsPerNamespace := .deploymentsPerNamespace}}
+{{$replicasPerDeployment := .replicasPerDeployment}}
+{{$tuningSet := .tuningSet}}
+{{$operationTimeout := .operationTimeout}}
+
+# delete = bring object count to 0; create/restart keep configured count.
+{{$replicasInPhase := $deploymentsPerNamespace}}
+{{if eq $actionName "delete"}}{{$replicasInPhase = 0}}{{end}}
+
+steps:
+  # Per-action WaitForControlledPodsRunning lifecycle: start (registers
+  # watcher with apiVersion+kind so CL2 knows which controllers to track),
+  # then create/restart/delete the workload, then gather. Using a per-action
+  # Identifier keeps the create/restart/delete invocations from clobbering
+  # each other's metric state across the three module calls in
+  # event-throughput.yaml.
+  - name: Start tracking event-throughput pods to be {{$actionName}}d
+    measurements:
+      - Identifier: WaitForControlledPodsRunning-{{$actionName}}
+        Method: WaitForControlledPodsRunning
+        Params:
+          action: start
+          apiVersion: apps/v1
+          kind: Deployment
+          checkIfPodsAreUpdated: true
+          labelSelector: group = clustermesh-event-throughput
+          operationTimeout: {{$operationTimeout}}
+
+  - name: {{$actionName}} event-throughput workload
+    phases:
+      - namespaceRange:
+          min: 1
+          max: {{$namespaces}}
+        replicasPerNamespace: {{$replicasInPhase}}
+        tuningSet: {{$tuningSet}}
+        objectBundle:
+          - basename: et
+            objectTemplatePath: /modules/event-throughput-deployment.yaml
+            templateFillMap:
+              Replicas: {{$replicasPerDeployment}}
+              Group: clustermesh-event-throughput
+              RestartGeneration: {{$generation}}
+          - basename: et
+            objectTemplatePath: /modules/event-throughput-service.yaml
+            templateFillMap:
+              Group: clustermesh-event-throughput
+
+  - name: Wait for event-throughput pods to be {{$actionName}}d
+    measurements:
+      - Identifier: WaitForControlledPodsRunning-{{$actionName}}
+        Method: WaitForControlledPodsRunning
+        Params:
+          action: gather
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/cilium.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/cilium.yaml
new file mode 100644
index 0000000000..4d27607347
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/cilium.yaml
@@ -0,0 +1,226 @@
+{{$action := .action}} # start, gather
+
+{{$suffix := DefaultParam .suffix ""}}
+
+steps:
+  - name: {{$action}} Additional Cilium Measurements
+    measurements:
+    - Identifier: CiliumAvgCPUUsage{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Average CPU Usage {{$suffix}}
+        metricVersion: v1
+        unit: cpu
+        enableViolations: true
+        queries:
+        - name: Perc99
+          query: quantile(0.99, avg_over_time(rate(cilium_process_cpu_seconds_total[1m])[%v:]))
+        - name: Perc90
+          query: quantile(0.90, avg_over_time(rate(cilium_process_cpu_seconds_total[1m])[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(rate(cilium_process_cpu_seconds_total[1m])[%v:]))
+    - Identifier: CiliumMaxCPUUsage{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Max CPU Usage {{$suffix}}
+        metricVersion: v1
+        unit: cpu
+        enableViolations: true
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(rate(cilium_process_cpu_seconds_total[1m])[%v:]))
+        - name: Perc90
+          query: quantile(0.90, max_over_time(rate(cilium_process_cpu_seconds_total[1m])[%v:]))
+        - name: Perc50
+          query: quantile(0.50, max_over_time(rate(cilium_process_cpu_seconds_total[1m])[%v:]))
+    - Identifier: CiliumAvgMemUsage{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Avg Memory Usage {{$suffix}}
+        metricVersion: v1
+        unit: MB
+        enableViolations: true
+        queries:
+        - name: Perc99
+          query: quantile(0.99, avg_over_time(cilium_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+        - name: Perc90
+          query: quantile(0.90, avg_over_time(cilium_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+        - name: Perc50
+          query: quantile(0.5, avg_over_time(cilium_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+    - Identifier: CiliumMaxMemUsage{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Max Memory Usage {{$suffix}}
+        metricVersion: v1
+        unit: MB
+        enableViolations: true
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(cilium_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+        - name: Perc90
+          query: quantile(0.90, max_over_time(cilium_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+        - name: Perc50
+          query: quantile(0.5, max_over_time(cilium_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+    - Identifier: CiliumOperatorAvgCPUUsage{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Operator Avg CPU Usage {{$suffix}}
+        metricVersion: v1
+        unit: cpu
+        enableViolations: true
+        queries:
+        - name: Perc99
+          query: quantile(0.99, avg_over_time(rate(cilium_operator_process_cpu_seconds_total[1m])[%v:]))
+        - name: Perc90
+          query: quantile(0.90, avg_over_time(rate(cilium_operator_process_cpu_seconds_total[1m])[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(rate(cilium_operator_process_cpu_seconds_total[1m])[%v:]))
+    - Identifier: CiliumOperatorMaxCPUUsage{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Operator Max CPU Usage {{$suffix}}
+        metricVersion: v1
+        unit: cpu
+        enableViolations: true
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(rate(cilium_operator_process_cpu_seconds_total[1m])[%v:]))
+        - name: Perc90
+          query: quantile(0.90, max_over_time(rate(cilium_operator_process_cpu_seconds_total[1m])[%v:]))
+        - name: Perc50
+          query: quantile(0.50, max_over_time(rate(cilium_operator_process_cpu_seconds_total[1m])[%v:]))
+    - Identifier: CiliumOperatorMaxMemUsage{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Operator Max Memory Usage {{$suffix}}
+        metricVersion: v1
+        unit: MB
+        enableViolations: true
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(cilium_operator_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+        - name: Perc90
+          query: quantile(0.90, max_over_time(cilium_operator_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+        - name: Perc50
+          query: quantile(0.5, max_over_time(cilium_operator_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+    - Identifier: CiliumOperatorAvgMemUsage{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Operator Avg Memory Usage {{$suffix}}
+        metricVersion: v1
+        unit: MB
+        enableViolations: true
+        queries:
+        - name: Perc99
+          query: quantile(0.99, avg_over_time(cilium_operator_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+        - name: Perc90
+          query: quantile(0.90, avg_over_time(cilium_operator_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+        - name: Perc50
+          query: quantile(0.5, avg_over_time(cilium_operator_process_resident_memory_bytes[%v:]) / 1024 / 1024)
+    - Identifier: CiliumContainerFsAvgWrittenBytes{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Container FS Average Written Bytes {{$suffix}}
+        metricVersion: v1
+        unit: bytes/s
+        enableViolations: true
+        queries:
+        - name: Perc99
+          query: quantile(0.99, avg_over_time(rate(container_fs_writes_bytes_total{container="cilium-agent"}[1m])[%v:]))
+        - name: Perc90
+          query: quantile(0.90, avg_over_time(rate(container_fs_writes_bytes_total{container="cilium-agent"}[1m])[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(rate(container_fs_writes_bytes_total{container="cilium-agent"}[1m])[%v:]))
+    - Identifier: CiliumContainerFsMaxWrittenBytes{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Container FS Max Written Bytes {{$suffix}}
+        metricVersion: v1
+        unit: bytes/s
+        enableViolations: true
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(rate(container_fs_writes_bytes_total{container="cilium-agent"}[1m])[%v:]))
+        - name: Perc90
+          query: quantile(0.90, max_over_time(rate(container_fs_writes_bytes_total{container="cilium-agent"}[1m])[%v:]))
+        - name: Perc50
+          query: quantile(0.50, max_over_time(rate(container_fs_writes_bytes_total{container="cilium-agent"}[1m])[%v:]))
+    # NOTE: FS write latency (avg/max) was intentionally dropped from this
+    # scenario. The query (rate(container_fs_write_seconds_total) / rate(
+    # container_fs_writes_total) for container="cilium-agent") returns no
+    # samples here because cilium-agent in the clustermesh scenario does
+    # almost all I/O via in-kernel bpf maps, not container fs — the write-op
+    # rate is genuinely ~0, so the division yields no result. Written-bytes
+    # rates (above) still produce useful data and remain the FS signal.
+
+    # ---------------------------------------------------------------------
+    # Network usage (spec line 38, 134: "CPU/memory/network per
+    # component"). cAdvisor exposes container_network_*_bytes_total per
+    # pod. We pin to pod="cilium-.*" instead of container="cilium-agent"
+    # because cAdvisor reports network counters at the pod-sandbox level
+    # (container="POD"), not the per-container level — so a
+    # container="cilium-agent" filter would return empty.
+    # ---------------------------------------------------------------------
+    - Identifier: CiliumContainerNetworkTransmitBytes{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Network Transmit Bytes {{$suffix}}
+        metricVersion: v1
+        unit: bytes/s
+        enableViolations: false
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(rate(container_network_transmit_bytes_total{pod=~"cilium-.*",namespace="kube-system"}[1m])[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(rate(container_network_transmit_bytes_total{pod=~"cilium-.*",namespace="kube-system"}[1m])[%v:]))
+    - Identifier: CiliumContainerNetworkReceiveBytes{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Network Receive Bytes {{$suffix}}
+        metricVersion: v1
+        unit: bytes/s
+        enableViolations: false
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(rate(container_network_receive_bytes_total{pod=~"cilium-.*",namespace="kube-system"}[1m])[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(rate(container_network_receive_bytes_total{pod=~"cilium-.*",namespace="kube-system"}[1m])[%v:]))
+
+    - Identifier: CiliumContainerRestarts{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: Cilium Container Restarts {{$suffix}}
+        metricVersion: v1
+        unit: "#"
+        enableViolations: true
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(increase(kube_pod_container_status_restarts_total{container="cilium-agent"}[%v])[%v:]))
+        - name: Perc90
+          query: quantile(0.90, max_over_time(increase(kube_pod_container_status_restarts_total{container="cilium-agent"}[%v])[%v:]))
+        - name: Perc50
+          query: quantile(0.50, max_over_time(increase(kube_pod_container_status_restarts_total{container="cilium-agent"}[%v])[%v:]))
+    # - Identifier: AvgCiliumHubbleMetricsCardinality{{$suffix}}
+    #   Method: GenericPrometheusQuery
+    #   Params:
+    #     action: {{$action}}
+    #     metricName: Average Cilium Hubble Metrics Cardinality {{$suffix}}
+    #     metricVersion: v1
+    #     unit: "#"
+    #     enableViolations: true
+    #     queries:
+    #     - name: Avg
+    #       query: count({__name__=~"hubble_.*"})
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/clustermesh-metrics.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/clustermesh-metrics.yaml
new file mode 100644
index 0000000000..18d0a2a85c
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/clustermesh-metrics.yaml
@@ -0,0 +1,192 @@
+{{$action := .action}} # start, gather
+
+{{$suffix := DefaultParam .suffix ""}}
+
+# ClusterMesh-specific Prometheus measurements.
+#
+# All metrics here are upstream Cilium clustermesh-apiserver / cilium-agent
+# metrics, scraped via the PodMonitor deployed by config/modules/clustermesh.yaml.
+# If AKS managed Cilium does not expose a given metric, GenericPrometheusQuery
+# returns empty data items (CL2 logs a warning, the run continues) — refine
+# query strings once we have a live mesh to inspect.
+
+steps:
+  - name: {{$action}} ClusterMesh Measurements
+    measurements:
+    # ---------------------------------------------------------------------
+    # Mesh health: how many remote clusters are connected from this cluster's
+    # perspective. In an N-cluster mesh, this gauge should reach (N-1) on every
+    # cluster. Capturing percentile shape across the run window flags drops.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshRemoteClustersConnected{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Remote Clusters Connected {{$suffix}}
+        metricVersion: v1
+        unit: "#"
+        enableViolations: false
+        queries:
+        - name: Perc99
+          query: quantile(0.99, avg_over_time(cilium_clustermesh_remote_clusters[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(cilium_clustermesh_remote_clusters[%v:]))
+        - name: Min
+          query: min_over_time(min(cilium_clustermesh_remote_clusters)[%v:])
+
+    # ---------------------------------------------------------------------
+    # Mesh failure counter: cumulative remote-cluster connection failures.
+    # Healthy runs should keep this at 0; we track the max increase observed
+    # over the run to surface flapping links during scale-up.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshRemoteClusterFailures{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Remote Cluster Failures {{$suffix}}
+        metricVersion: v1
+        unit: "#"
+        enableViolations: false
+        queries:
+        - name: MaxIncrease
+          query: max(max_over_time(cilium_clustermesh_remote_cluster_failures[%v:])) - min(min_over_time(cilium_clustermesh_remote_cluster_failures[%v:]))
+
+    # ---------------------------------------------------------------------
+    # Cross-cluster event throughput — the headline metric for scale scenario
+    # #1 (Cross-Cluster Event Throughput) and #2 (Pod Churn). Rate of kvstore
+    # events queued per second on this cluster.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshKvstoreEventsRate{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Kvstore Events Rate {{$suffix}}
+        metricVersion: v1
+        unit: events/s
+        enableViolations: false
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(rate(cilium_kvstoremesh_kvstore_events_queue_seconds_count[1m])[%v:]))
+        - name: Perc90
+          query: quantile(0.90, max_over_time(rate(cilium_kvstoremesh_kvstore_events_queue_seconds_count[1m])[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(rate(cilium_kvstoremesh_kvstore_events_queue_seconds_count[1m])[%v:]))
+
+    # ---------------------------------------------------------------------
+    # Per-type event rate breakdown (spec line 131: "Event rate (per
+    # type)"). The kvstoremesh kvstore-events histogram carries a
+    # `scope` label tagging which kvstore key family the event touched.
+    # We split into the three families spec line 5 calls out: endpoints,
+    # services, identities. Cilium 1.18 uses these scope values:
+    #   identities/v1     — security identities
+    #   services/v1       — global Service objects
+    #   ip/v1             — endpoint IP-to-identity mappings (endpoints)
+    #   nodes/v1          — node tunnel / IPAM advertisements
+    #   serviceexports/v1 — MCS-API ServiceExport objects
+    #   lease             — leader election
+    #   cilium/.heartbeat — kvstore liveness heartbeat
+    #   cilium/syncedcanaries — initial-sync barrier markers
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshKvstoreEventsRateIdentities{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Kvstore Events Rate Identities {{$suffix}}
+        metricVersion: v1
+        unit: events/s
+        enableViolations: false
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(sum(rate(cilium_kvstoremesh_kvstore_events_queue_seconds_count{scope="identities/v1"}[1m]))[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(sum(rate(cilium_kvstoremesh_kvstore_events_queue_seconds_count{scope="identities/v1"}[1m]))[%v:]))
+    - Identifier: ClusterMeshKvstoreEventsRateServices{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Kvstore Events Rate Services {{$suffix}}
+        metricVersion: v1
+        unit: events/s
+        enableViolations: false
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(sum(rate(cilium_kvstoremesh_kvstore_events_queue_seconds_count{scope="services/v1"}[1m]))[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(sum(rate(cilium_kvstoremesh_kvstore_events_queue_seconds_count{scope="services/v1"}[1m]))[%v:]))
+    - Identifier: ClusterMeshKvstoreEventsRateEndpoints{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Kvstore Events Rate Endpoints {{$suffix}}
+        metricVersion: v1
+        unit: events/s
+        enableViolations: false
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(sum(rate(cilium_kvstoremesh_kvstore_events_queue_seconds_count{scope="ip/v1"}[1m]))[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(sum(rate(cilium_kvstoremesh_kvstore_events_queue_seconds_count{scope="ip/v1"}[1m]))[%v:]))
+
+    # ---------------------------------------------------------------------
+    # Cross-cluster propagation latency proxy: p99 of kvstore operation
+    # duration. This is the closest upstream metric to "how long does it take
+    # for a change in cluster A to be visible in cluster B" without injecting
+    # synthetic probes.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshKvstoreOperationDuration{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Kvstore Operation Duration {{$suffix}}
+        metricVersion: v1
+        unit: s
+        enableViolations: false
+        queries:
+        - name: Perc99
+          query: histogram_quantile(0.99, sum(rate(cilium_kvstoremesh_kvstore_operations_duration_seconds_bucket[1m])) by (le))
+        - name: Perc90
+          query: histogram_quantile(0.90, sum(rate(cilium_kvstoremesh_kvstore_operations_duration_seconds_bucket[1m])) by (le))
+        - name: Perc50
+          query: histogram_quantile(0.50, sum(rate(cilium_kvstoremesh_kvstore_operations_duration_seconds_bucket[1m])) by (le))
+
+    # ---------------------------------------------------------------------
+    # Watch queue depth (saturation signal — spec line 37 "Key signals:
+    # ... Watch queue depth"). cilium_kvstoremesh_kvstore_sync_queue_size
+    # is a gauge: number of items currently waiting to be processed by
+    # the kvstoremesh sync loop. A persistently positive or growing value
+    # is the saturation indicator (event ingest > drain rate).
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshKvstoreSyncQueueSize{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Kvstore Sync Queue Size {{$suffix}}
+        metricVersion: v1
+        unit: "#"
+        enableViolations: false
+        queries:
+        - name: Max
+          query: max(max_over_time(cilium_kvstoremesh_kvstore_sync_queue_size[%v:]))
+        - name: Perc99
+          query: quantile(0.99, max_over_time(cilium_kvstoremesh_kvstore_sync_queue_size[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(cilium_kvstoremesh_kvstore_sync_queue_size[%v:]))
+
+    # ---------------------------------------------------------------------
+    # Identity propagation: cilium identity count. Under cross-cluster pod
+    # churn (scenarios #1, #2, #3), this should track the global identity
+    # set converging across clusters. Divergence flags propagation lag.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshIdentityCount{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Identity Count {{$suffix}}
+        metricVersion: v1
+        unit: "#"
+        enableViolations: false
+        queries:
+        - name: Perc99
+          query: quantile(0.99, max_over_time(cilium_identity[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(cilium_identity[%v:]))
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/clustermesh-throughput.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/clustermesh-throughput.yaml
new file mode 100644
index 0000000000..c0dd5f92c6
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/clustermesh-throughput.yaml
@@ -0,0 +1,78 @@
+{{$action := .action}} # start, gather
+
+{{$suffix := DefaultParam .suffix ""}}
+
+# Scenario #1 (Cross-Cluster Event Throughput) — extra measurements layered
+# on top of the always-on clustermesh-metrics.yaml. These are specifically
+# tuned to the event-throughput workload's create/restart/delete sequence,
+# and are scoped to this scenario because they only make sense when the
+# workload is actively churning kvstore writes.
+
+steps:
+  - name: {{$action}} ClusterMesh Event Throughput Measurements
+    measurements:
+    # ---------------------------------------------------------------------
+    # Backlog detection: the headline saturation signal. If the rate of
+    # events queued exceeds the rate at which the local agent drains them,
+    # the system is over-saturated. A sustained positive value over the
+    # measurement window is the failure mode scale testing.txt line 14
+    # ("upper bounds — effective QPS limit") is asking us to find.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshEventBacklog{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Event Backlog Rate {{$suffix}}
+        metricVersion: v1
+        unit: events/s
+        enableViolations: false
+        queries:
+        # Wrap each side in sum() to drop labels — the two metrics carry
+        # non-identical label sets (e.g. sync_errors_total has a per-cluster
+        # `source_cluster` label that events_queue_seconds_count doesn't).
+        # Without sum(), PromQL's binary `-` returns an empty vector when
+        # operand label sets don't align. sum() collapses both to a single
+        # series so the subtraction is well-defined.
+        - name: Perc99
+          query: quantile(0.99, max_over_time((sum(rate(cilium_kvstoremesh_kvstore_events_queue_seconds_count[1m])) - sum(rate(cilium_kvstoremesh_kvstore_sync_errors_total[1m])))[%v:]))
+        - name: MaxBurst
+          query: max(max_over_time(rate(cilium_kvstoremesh_kvstore_events_queue_seconds_count[30s])[%v:]))
+
+    # ---------------------------------------------------------------------
+    # Global services gauge: one row per cluster of how many global services
+    # this cluster's clustermesh-apiserver has accepted. With the workload
+    # creating N global Services per cluster across M clusters, every cluster
+    # should observe roughly N*M global services. Divergence flags either
+    # scrape failures or service-propagation lag.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshGlobalServices{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Global Services {{$suffix}}
+        metricVersion: v1
+        unit: "#"
+        enableViolations: false
+        queries:
+        - name: Max
+          query: max(max_over_time(cilium_clustermesh_global_services[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(cilium_clustermesh_global_services[%v:]))
+
+    # ---------------------------------------------------------------------
+    # Explicit p95 split for kvstore operation latency. clustermesh-metrics.yaml
+    # already emits p50/p90/p99; for scenario #1 we also surface p95 so the
+    # scaling-curve dashboard has a smoother percentile gradient when plotting
+    # latency vs cluster count.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshKvstoreOperationDurationP95{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Kvstore Operation Duration P95 {{$suffix}}
+        metricVersion: v1
+        unit: s
+        enableViolations: false
+        queries:
+        - name: Perc95
+          query: histogram_quantile(0.95, sum(rate(cilium_kvstoremesh_kvstore_operations_duration_seconds_bucket[1m])) by (le))
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/control-plane.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/control-plane.yaml
new file mode 100644
index 0000000000..47504cbf89
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/control-plane.yaml
@@ -0,0 +1,86 @@
+{{$action := .action}} # start, gather
+
+# Feature gates
+{{$podStartupLatencyThreshold := DefaultParam .CL2_POD_STARTUP_LATENCY_THRESHOLD "15s"}}
+{{$ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS_SIMPLE := DefaultParam .CL2_ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS_SIMPLE true}}
+{{$PROMETHEUS_SCRAPE_KUBE_PROXY := DefaultParam .PROMETHEUS_SCRAPE_KUBE_PROXY true}}
+{{$NETWORK_LATENCY_THRESHOLD := DefaultParam .CL2_NETWORK_LATENCY_THRESHOLD "0s"}}
+{{$ENABLE_IN_CLUSTER_NETWORK_LATENCY := DefaultParam .CL2_ENABLE_IN_CLUSTER_NETWORK_LATENCY true}}
+
+{{$suffix := DefaultParam .suffix ""}}
+
+steps:
+  - name: {{$action}} Additional Measurements
+    measurements:
+      - Identifier: APIResponsivenessPrometheus{{$suffix}}
+        Method: APIResponsivenessPrometheus
+        Params:
+          action: {{$action}}
+          enableViolations: {{$ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS_SIMPLE}}
+          useSimpleLatencyQuery: true
+      - Identifier: PodStartupLatency{{$suffix}}
+        Method: PodStartupLatency
+        Params:
+          action: {{$action}}
+          labelSelector: group = {{.group}}
+          threshold: {{$podStartupLatencyThreshold}}
+      - Identifier: ApiserverAvgCPUUsage{{$suffix}}
+        Method: GenericPrometheusQuery
+        Params:
+          action: {{$action}}
+          metricName: Apiserver Average CPU Usage {{$suffix}}
+          metricVersion: v1
+          unit: cpu
+          enableViolations: true
+          queries:
+          - name: Perc99
+            query: quantile(0.99, avg_over_time(rate(process_cpu_seconds_total{endpoint="apiserver"}[1m])[%v:]))
+          - name: Perc90
+            query: quantile(0.90, avg_over_time(rate(process_cpu_seconds_total{endpoint="apiserver"}[1m])[%v:]))
+          - name: Perc50
+            query: quantile(0.50, avg_over_time(rate(process_cpu_seconds_total{endpoint="apiserver"}[1m])[%v:]))
+      - Identifier: ApiserverMaxCPUUsage{{$suffix}}
+        Method: GenericPrometheusQuery
+        Params:
+          action: {{$action}}
+          metricName: Apiserver Max CPU Usage {{$suffix}}
+          metricVersion: v1
+          unit: cpu
+          enableViolations: true
+          queries:
+          - name: Perc99
+            query: quantile(0.99, max_over_time(rate(process_cpu_seconds_total{endpoint="apiserver"}[1m])[%v:]))
+          - name: Perc90
+            query: quantile(0.90, max_over_time(rate(process_cpu_seconds_total{endpoint="apiserver"}[1m])[%v:]))
+          - name: Perc50
+            query: quantile(0.50, max_over_time(rate(process_cpu_seconds_total{endpoint="apiserver"}[1m])[%v:]))
+      - Identifier: ApiserverAvgMemUsage{{$suffix}}
+        Method: GenericPrometheusQuery
+        Params:
+          action: {{$action}}
+          metricName: Apiserver Average Memory Usage {{$suffix}}
+          metricVersion: v1
+          unit: MB
+          enableViolations: true
+          queries:
+          - name: Perc99
+            query: quantile(0.99, avg_over_time(process_resident_memory_bytes{endpoint="apiserver"}[%v:]) / 1024 / 1024)
+          - name: Perc90
+            query: quantile(0.90, avg_over_time(process_resident_memory_bytes{endpoint="apiserver"}[%v:]) / 1024 / 1024)
+          - name: Perc50
+            query: quantile(0.5, avg_over_time(process_resident_memory_bytes{endpoint="apiserver"}[%v:]) / 1024 / 1024)
+      - Identifier: ApiserverMaxMemUsage{{$suffix}}
+        Method: GenericPrometheusQuery
+        Params:
+          action: {{$action}}
+          metricName: Apiserver Max Memory Usage {{$suffix}}
+          metricVersion: v1
+          unit: MB
+          enableViolations: true
+          queries:
+          - name: Perc99
+            query: quantile(0.99, max_over_time(process_resident_memory_bytes{endpoint="apiserver"}[%v:]) / 1024 / 1024)
+          - name: Perc90
+            query: quantile(0.90, max_over_time(process_resident_memory_bytes{endpoint="apiserver"}[%v:]) / 1024 / 1024)
+          - name: Perc50
+            query: quantile(0.5, max_over_time(process_resident_memory_bytes{endpoint="apiserver"}[%v:]) / 1024 / 1024)
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/etcd-metrics.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/etcd-metrics.yaml
new file mode 100644
index 0000000000..129891204d
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/measurements/etcd-metrics.yaml
@@ -0,0 +1,158 @@
+{{$action := .action}} # start, gather
+
+{{$suffix := DefaultParam .suffix ""}}
+
+# Etcd-internal measurements for the embedded etcd inside each cluster's
+# clustermesh-apiserver pod.
+#
+# Spec coverage (scale testing.txt):
+#   - line  34: "Metrics: Cilium, clustermesh-apiserver, etcd"
+#   - line 134: "etcd metrics (watch count, compactions, latency)"
+#
+# Source: the etcd container in the clustermesh-apiserver pod is launched
+# with `--listen-metrics-urls=http://0.0.0.0:9963` and `--metrics=basic`.
+# Our PodMonitor (modules/clustermesh/podmonitor.yaml, port 9963 endpoint)
+# already scrapes that target — we just hadn't been querying the metrics.
+#
+# `--metrics=basic` only emits the etcd_debugging_* family (despite the
+# name, these ARE the basic-tier metrics; the "extensive" tier adds
+# etcd_disk_wal_fsync_*, etcd_network_peer_*, etcd_mvcc_db_total_size_in_bytes,
+# etc., which AKS-managed Cilium does not enable). Queries below pick the
+# best basic-tier proxies for each spec-required signal.
+
+steps:
+  - name: {{$action}} ClusterMesh Etcd Measurements
+    measurements:
+    # ---------------------------------------------------------------------
+    # Watch count (spec line 134 "watch count"). Total watchers currently
+    # registered against this cluster's clustermesh-apiserver etcd. Each
+    # remote cluster's kvstoremesh maintains watchers for endpoints,
+    # services, and identities, so this scales with mesh size and traffic.
+    # Slow-watcher count is the back-pressure signal: a non-zero value
+    # means watchers can't keep up with the event stream.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshEtcdWatchCount{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Etcd Watch Count {{$suffix}}
+        metricVersion: v1
+        unit: "#"
+        enableViolations: false
+        queries:
+        - name: Max
+          query: max(max_over_time(etcd_debugging_mvcc_watcher_total[%v:]))
+        - name: Perc99
+          query: quantile(0.99, max_over_time(etcd_debugging_mvcc_watcher_total[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(etcd_debugging_mvcc_watcher_total[%v:]))
+
+    - Identifier: ClusterMeshEtcdSlowWatchers{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Etcd Slow Watchers {{$suffix}}
+        metricVersion: v1
+        unit: "#"
+        enableViolations: false
+        queries:
+        - name: Max
+          query: max(max_over_time(etcd_debugging_mvcc_slow_watcher_total[%v:]))
+        - name: Perc99
+          query: quantile(0.99, max_over_time(etcd_debugging_mvcc_slow_watcher_total[%v:]))
+
+    # ---------------------------------------------------------------------
+    # Pending events: events queued for delivery to watchers but not yet
+    # consumed. A growing value over the run window is the etcd-side
+    # equivalent of the kvstoremesh sync queue depth — back-pressure from
+    # the consumer side.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshEtcdPendingEvents{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Etcd Pending Events {{$suffix}}
+        metricVersion: v1
+        unit: "#"
+        enableViolations: false
+        queries:
+        - name: Max
+          query: max(max_over_time(etcd_debugging_mvcc_pending_events_total[%v:]))
+        - name: Perc99
+          query: quantile(0.99, max_over_time(etcd_debugging_mvcc_pending_events_total[%v:]))
+
+    # ---------------------------------------------------------------------
+    # Compactions (spec line 134 "compactions"). Auto-compaction is
+    # enabled with `--auto-compaction-retention=1` (1-hour retention). Two
+    # signals: how long a compaction takes (latency) and how many keys
+    # were removed (work done).
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshEtcdCompactionDuration{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Etcd Compaction Duration {{$suffix}}
+        metricVersion: v1
+        unit: ms
+        enableViolations: false
+        queries:
+        - name: Perc99
+          query: histogram_quantile(0.99, sum(rate(etcd_debugging_mvcc_db_compaction_total_duration_milliseconds_bucket[%v])) by (le))
+        - name: Perc50
+          query: histogram_quantile(0.50, sum(rate(etcd_debugging_mvcc_db_compaction_total_duration_milliseconds_bucket[%v])) by (le))
+
+    - Identifier: ClusterMeshEtcdCompactionKeys{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Etcd Compacted Keys {{$suffix}}
+        metricVersion: v1
+        unit: "#"
+        enableViolations: false
+        queries:
+        - name: TotalIncrease
+          query: max(max_over_time(etcd_debugging_mvcc_db_compaction_keys_total[%v:])) - min(min_over_time(etcd_debugging_mvcc_db_compaction_keys_total[%v:]))
+
+    # ---------------------------------------------------------------------
+    # Disk-write latency (spec line 134 "latency"). With --metrics=basic
+    # we don't have etcd_disk_wal_fsync_duration_seconds; the closest
+    # available proxy is etcd_debugging_disk_backend_commit_write_duration
+    # (how long it takes to commit a write txn to the bbolt backend).
+    # Together with rebalance/spill durations, this characterizes etcd's
+    # disk subsystem performance under load.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshEtcdBackendWriteDuration{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Etcd Backend Write Duration {{$suffix}}
+        metricVersion: v1
+        unit: s
+        enableViolations: false
+        queries:
+        - name: Perc99
+          query: histogram_quantile(0.99, sum(rate(etcd_debugging_disk_backend_commit_write_duration_seconds_bucket[1m])) by (le))
+        - name: Perc90
+          query: histogram_quantile(0.90, sum(rate(etcd_debugging_disk_backend_commit_write_duration_seconds_bucket[1m])) by (le))
+        - name: Perc50
+          query: histogram_quantile(0.50, sum(rate(etcd_debugging_disk_backend_commit_write_duration_seconds_bucket[1m])) by (le))
+
+    # ---------------------------------------------------------------------
+    # MVCC store size proxy. With --metrics=basic we don't get
+    # etcd_mvcc_db_total_size_in_bytes; etcd_debugging_mvcc_keys_total is
+    # the key count and etcd_debugging_mvcc_total_put_size_in_bytes is the
+    # cumulative bytes written. Together they bound the working set.
+    # ---------------------------------------------------------------------
+    - Identifier: ClusterMeshEtcdMvccKeys{{$suffix}}
+      Method: GenericPrometheusQuery
+      Params:
+        action: {{$action}}
+        metricName: ClusterMesh Etcd MVCC Keys {{$suffix}}
+        metricVersion: v1
+        unit: "#"
+        enableViolations: false
+        queries:
+        - name: Max
+          query: max(max_over_time(etcd_debugging_mvcc_keys_total[%v:]))
+        - name: Perc50
+          query: quantile(0.50, avg_over_time(etcd_debugging_mvcc_keys_total[%v:]))
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/scale-test-deployment.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/scale-test-deployment.yaml
new file mode 100644
index 0000000000..9ceffc8595
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/scale-test-deployment.yaml
@@ -0,0 +1,27 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{.Name}}
+  labels:
+    group: {{.Group}}
+spec:
+  replicas: {{.Replicas}}
+  selector:
+    matchLabels:
+      name: {{.Name}}
+  template:
+    metadata:
+      labels:
+        name: {{.Name}}
+        group: {{.Group}}
+    spec:
+      containers:
+        - name: pause
+          image: mcr.microsoft.com/oss/kubernetes/pause:3.6
+          resources:
+            requests:
+              cpu: 1m
+              memory: 5Mi
+            limits:
+              cpu: 5m
+              memory: 20Mi
diff --git a/modules/python/clusterloader2/clustermesh-scale/config/modules/scale-test.yaml b/modules/python/clusterloader2/clustermesh-scale/config/modules/scale-test.yaml
new file mode 100644
index 0000000000..5fd806c60b
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/config/modules/scale-test.yaml
@@ -0,0 +1,57 @@
+name: clustermesh-scale-test-module
+
+# Trivial pod deployment module: creates or deletes
+# namespaces x deploymentsPerNamespace x replicasPerDeployment
+# pause-image pods on the target cluster. No traffic, no churn, no policies.
+
+{{$actionName := .actionName}}
+{{$namespaces := .namespaces}}
+{{$deploymentsPerNamespace := .deploymentsPerNamespace}}
+{{$replicasPerDeployment := .replicasPerDeployment}}
+{{$tuningSet := .tuningSet}}
+{{$operationTimeout := .operationTimeout}}
+
+{{$totalDeployments := MultiplyInt $namespaces $deploymentsPerNamespace}}
+
+steps:
+  # Register a fresh WaitForControlledPodsRunning watcher BEFORE the
+  # create/delete phase. Without this, the second invocation of this module
+  # (actionName=delete) errors with "metric WaitForControlledPodsRunning has
+  # not been started" — CL2 closes the metric after the first `gather`, so
+  # each invocation needs its own start. We use a per-action Identifier
+  # ("...-create" / "...-delete") so the start and gather pair cleanly even
+  # if the runtime ever caches metrics by Identifier across invocations.
+  - name: Start tracking pods to be {{$actionName}}d
+    measurements:
+      - Identifier: WaitForControlledPodsRunning-{{$actionName}}
+        Method: WaitForControlledPodsRunning
+        Params:
+          action: start
+          # CL2 needs apiVersion+kind to know which controllers to track on
+          # start; we deploy Deployment objects (see scale-test-deployment.yaml).
+          apiVersion: apps/v1
+          kind: Deployment
+          checkIfPodsAreUpdated: true
+          labelSelector: group = clustermesh-scale-test
+          operationTimeout: {{$operationTimeout}}
+
+  - name: {{$actionName}} deployments
+    phases:
+      - namespaceRange:
+          min: 1
+          max: {{$namespaces}}
+        replicasPerNamespace: {{$deploymentsPerNamespace}}
+        tuningSet: {{$tuningSet}}
+        objectBundle:
+          - basename: scale-test
+            objectTemplatePath: /modules/scale-test-deployment.yaml
+            templateFillMap:
+              Replicas: {{$replicasPerDeployment}}
+              Group: clustermesh-scale-test
+
+  - name: Wait for deployments to be {{$actionName}}d
+    measurements:
+      - Identifier: WaitForControlledPodsRunning-{{$actionName}}
+        Method: WaitForControlledPodsRunning
+        Params:
+          action: gather
diff --git a/modules/python/clusterloader2/clustermesh-scale/scale.py b/modules/python/clusterloader2/clustermesh-scale/scale.py
new file mode 100644
index 0000000000..35047f122a
--- /dev/null
+++ b/modules/python/clusterloader2/clustermesh-scale/scale.py
@@ -0,0 +1,258 @@
+"""
+ClusterMesh scale-test harness.
+
+Single-cluster invocation. The Telescope pipeline fans out by calling this
+script once per fleet member (driven by `az fleet clustermeshprofile list-members`
+in steps/topology/clustermesh-scale/execute-clusterloader2.yml). Each invocation
+emits one JSONL with a `cluster` attribution column so concatenated results from
+N clusters are queryable per-cluster downstream.
+
+Phase 1 is intentionally trivial: deploy a small fixed number of pods, no churn,
+no fortio, no network policies. The goal of Phase 1 is to prove the multi-cluster
+harness + topology + aggregation works end-to-end. Real measurements
+(cross-cluster event throughput, identity propagation, etc.) come in plan.md
+Phase 2 by adding measurement modules to config/modules/measurements/ and new
+parameters to configure/collect.
+"""
+import argparse
+import json
+import os
+from datetime import datetime, timezone
+
+from clusterloader2.utils import parse_xml_to_json, run_cl2_command, process_cl2_reports
+
+
+def configure_clusterloader2(
+    namespaces,
+    deployments_per_namespace,
+    replicas_per_deployment,
+    operation_timeout,
+    override_file,
+):
+    with open(override_file, "w", encoding="utf-8") as f:
+        # Prometheus stack — keep the Cilium-scrape flags ON so the
+        # cilium/control-plane/clustermesh measurement modules have data to
+        # query. The base memory REQUEST is set via the --prometheus-memory-request
+        # CLI flag in execute_clusterloader2 (the CL2_PROMETHEUS_MEMORY_REQUEST
+        # overrides key is not honored by this CL2 image). Memory LIMIT below
+        # IS honored as an overrides key and must be >= the request to satisfy
+        # k8s admission.
+        f.write("CL2_PROMETHEUS_TOLERATE_MASTER: true\n")
+        f.write("CL2_PROMETHEUS_MEMORY_LIMIT: 2Gi\n")
+        # Pin Prometheus to the dedicated `prompool` node (label
+        # prometheus=true is set in azure-2.tfvars extra_node_pool). Without
+        # this, prometheus-k8s lands on the default workload pool and
+        # competes with the 200 event-throughput pods for CPU/memory,
+        # causing per-node overcommit and Pending workload pods.
+        f.write('CL2_PROMETHEUS_NODE_SELECTOR: "prometheus: \\"true\\""\n')
+        f.write("CL2_PROMETHEUS_SCRAPE_CILIUM_AGENT: true\n")
+        f.write("CL2_PROMETHEUS_SCRAPE_CILIUM_OPERATOR: true\n")
+        f.write("CL2_POD_STARTUP_LATENCY_THRESHOLD: 3m\n")
+        # APIResponsivenessPrometheus default SLO (perc99 ≤ 1s) is tuned for
+        # production-scale clusters in steady state; on Phase-1 dev clusters
+        # the kube-apiserver hits multi-second perc99 during the Prometheus
+        # stack bring-up (mutatingwebhookconfigurations APPLY,
+        # customresourcedefinitions POST/PUT). The metric is still recorded
+        # — we just stop CL2 from failing the test on threshold breaches.
+        f.write("CL2_ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS_SIMPLE: false\n")
+
+        # Topology knobs — trivial defaults for Phase 1 vertical slice.
+        f.write(f"CL2_NAMESPACES: {namespaces}\n")
+        f.write(f"CL2_DEPLOYMENTS_PER_NAMESPACE: {deployments_per_namespace}\n")
+        f.write(f"CL2_REPLICAS_PER_DEPLOYMENT: {replicas_per_deployment}\n")
+        f.write(f"CL2_OPERATION_TIMEOUT: {operation_timeout}\n")
+
+    with open(override_file, "r", encoding="utf-8") as f:
+        print(f"Content of file {override_file}:\n{f.read()}")
+
+
+def execute_clusterloader2(
+    cl2_image,
+    cl2_config_dir,
+    cl2_report_dir,
+    cl2_config_file,
+    kubeconfig,
+    provider,
+):
+    run_cl2_command(
+        kubeconfig,
+        cl2_image,
+        cl2_config_dir,
+        cl2_report_dir,
+        provider,
+        cl2_config_file=cl2_config_file,
+        overrides=True,
+        enable_prometheus=True,
+        tear_down_prometheus=False,
+        scrape_kubelets=True,
+        scrape_ksm=True,
+        scrape_metrics_server=True,
+        # CL2 default is 10Gi which doesn't fit a Standard_D4s_v4 / 16GB node
+        # after k8s + Cilium overhead. Override via the CLI flag rather than
+        # `CL2_PROMETHEUS_MEMORY_REQUEST` overrides.yaml key — that key is not
+        # honored by this CL2 image (verified via prometheus-operator log
+        # showing PrometheusMemoryRequest:10Gi at runtime). Pair this with
+        # CL2_PROMETHEUS_MEMORY_LIMIT in the overrides file so request <= limit.
+        prometheus_memory_request="1Gi",
+    )
+
+
+def collect_clusterloader2(
+    cl2_report_dir,
+    cloud_info,
+    run_id,
+    run_url,
+    result_file,
+    test_type,
+    start_timestamp,
+    cluster_name,
+    cluster_count,
+    mesh_size,
+    namespaces,
+    deployments_per_namespace,
+    replicas_per_deployment,
+    trigger_reason="",
+):
+    details = parse_xml_to_json(os.path.join(cl2_report_dir, "junit.xml"), indent=2)
+    json_data = json.loads(details)
+    testsuites = json_data["testsuites"]
+
+    if testsuites:
+        status = "success" if testsuites[0]["failures"] == 0 else "failure"
+    else:
+        raise Exception(f"No testsuites found in the report! Raw data: {details}")
+
+    template = {
+        "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "status": status,
+        "group": None,
+        "measurement": None,
+        "result": None,
+        "test_details": {
+            "trigger_reason": trigger_reason,
+            # Cluster attribution — every row emitted for this run is tagged
+            # with the cluster it came from, so downstream Kusto queries can
+            # group/filter by cluster across an N-cluster mesh test.
+            "cluster": cluster_name,
+            # mesh_size is the configured target N (from pipeline matrix);
+            # cluster_count is what was actually discovered at run time. Querying
+            # `mesh_size != cluster_count` in Kusto surfaces partial-mesh runs
+            # (e.g., a Fleet member that failed to join) without needing a join
+            # to control-plane logs.
+            "mesh_size": mesh_size,
+            "cluster_count": cluster_count,
+            "namespaces": namespaces,
+            "deployments_per_namespace": deployments_per_namespace,
+            "replicas_per_deployment": replicas_per_deployment,
+            "pods_per_cluster": namespaces * deployments_per_namespace * replicas_per_deployment,
+            "details": (
+                testsuites[0]["testcases"][0].get("failure", None)
+                if testsuites[0].get("testcases")
+                else None
+            ),
+        },
+        "cloud_info": cloud_info,
+        "run_id": run_id,
+        "run_url": run_url,
+        "test_type": test_type,
+        "start_timestamp": start_timestamp,
+        # parameters (top-level for Kusto column convenience)
+        "cluster": cluster_name,
+        "mesh_size": mesh_size,
+        "cluster_count": cluster_count,
+        "namespaces": namespaces,
+        "deployments_per_namespace": deployments_per_namespace,
+        "replicas_per_deployment": replicas_per_deployment,
+    }
+    content = process_cl2_reports(cl2_report_dir, template)
+
+    os.makedirs(os.path.dirname(result_file), exist_ok=True)
+    with open(result_file, "w", encoding="utf-8") as f:
+        f.write(content)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="ClusterMesh scale-test harness.")
+    subparsers = parser.add_subparsers(dest="command")
+
+    # configure
+    pc = subparsers.add_parser("configure", help="Write CL2 overrides file")
+    pc.add_argument("--namespaces", type=int, required=True)
+    pc.add_argument("--deployments-per-namespace", type=int, required=True)
+    pc.add_argument("--replicas-per-deployment", type=int, required=True)
+    pc.add_argument("--operation-timeout", type=str, default="15m")
+    pc.add_argument("--cl2_override_file", type=str, required=True,
+                    help="Path to the overrides of CL2 config file")
+
+    # execute
+    pe = subparsers.add_parser("execute", help="Run CL2 against a single cluster")
+    pe.add_argument("--cl2-image", type=str, required=True)
+    pe.add_argument("--cl2-config-dir", type=str, required=True)
+    pe.add_argument("--cl2-report-dir", type=str, required=True)
+    pe.add_argument("--cl2-config-file", type=str, required=True)
+    pe.add_argument("--kubeconfig", type=str, required=True)
+    pe.add_argument("--provider", type=str, required=True)
+
+    # collect
+    pco = subparsers.add_parser("collect", help="Collect results for one cluster")
+    pco.add_argument("--cl2_report_dir", type=str, required=True)
+    pco.add_argument("--cloud_info", type=str, default="")
+    pco.add_argument("--run_id", type=str, required=True)
+    pco.add_argument("--run_url", type=str, default="")
+    pco.add_argument("--result_file", type=str, required=True)
+    pco.add_argument("--test_type", type=str, default="default-config")
+    pco.add_argument("--start_timestamp", type=str, required=True)
+    pco.add_argument("--cluster-name", type=str, required=True,
+                     help="Fleet member / AKS cluster identity for attribution")
+    pco.add_argument("--cluster-count", type=int, required=True,
+                     help="Total clusters in the mesh for this run (N)")
+    pco.add_argument("--mesh-size", type=int, required=True,
+                     help="Configured target cluster count from the pipeline matrix; "
+                          "compared against --cluster-count to detect partial-mesh runs")
+    pco.add_argument("--namespaces", type=int, required=True)
+    pco.add_argument("--deployments-per-namespace", type=int, required=True)
+    pco.add_argument("--replicas-per-deployment", type=int, required=True)
+    pco.add_argument("--trigger_reason", type=str, default="")
+
+    args = parser.parse_args()
+
+    if args.command == "configure":
+        configure_clusterloader2(
+            args.namespaces,
+            args.deployments_per_namespace,
+            args.replicas_per_deployment,
+            args.operation_timeout,
+            args.cl2_override_file,
+        )
+    elif args.command == "execute":
+        execute_clusterloader2(
+            args.cl2_image,
+            args.cl2_config_dir,
+            args.cl2_report_dir,
+            args.cl2_config_file,
+            args.kubeconfig,
+            args.provider,
+        )
+    elif args.command == "collect":
+        collect_clusterloader2(
+            args.cl2_report_dir,
+            args.cloud_info,
+            args.run_id,
+            args.run_url,
+            args.result_file,
+            args.test_type,
+            args.start_timestamp,
+            args.cluster_name,
+            args.cluster_count,
+            args.mesh_size,
+            args.namespaces,
+            args.deployments_per_namespace,
+            args.replicas_per_deployment,
+            args.trigger_reason,
+        )
+    else:
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/python/clusterloader2/utils.py b/modules/python/clusterloader2/utils.py
index 50deb2ed85..f0cec83046 100644
--- a/modules/python/clusterloader2/utils.py
+++ b/modules/python/clusterloader2/utils.py
@@ -25,7 +25,8 @@
 
 def run_cl2_command(kubeconfig, cl2_image, cl2_config_dir, cl2_report_dir, provider, cl2_config_file="config.yaml", overrides=False, enable_prometheus=False, tear_down_prometheus=True,
                     enable_exec_service=False, scrape_kubelets=False,
-                    scrape_containerd=False, scrape_ksm=False, scrape_metrics_server=False):
+                    scrape_containerd=False, scrape_ksm=False, scrape_metrics_server=False,
+                    prometheus_memory_request=None):
     docker_client = DockerClient()
 
     command = f"""--provider={provider} --v=2
@@ -42,6 +43,14 @@ def run_cl2_command(kubeconfig, cl2_image, cl2_config_dir, cl2_report_dir, provi
     if scrape_containerd:
         command += f" --prometheus-scrape-containerd={scrape_containerd}"
 
+    if prometheus_memory_request:
+        # CL2 default is 10Gi. Smaller-than-default node SKUs (e.g. AKS
+        # Standard_D4s_v4 with 16GB) can't schedule the pod with the default
+        # request, and the resource-quota / limit ratio in the bundled
+        # prometheus manifests is rejected by k8s admission. Optional
+        # parameter — None preserves CL2 default for existing callers.
+        command += f" --prometheus-memory-request={prometheus_memory_request}"
+
     if overrides:
         command += " --testoverrides=/root/perf-tests/clusterloader2/config/overrides.yaml"
 
diff --git a/modules/python/tests/mock_data/clustermesh-scale/report/mesh-1/GenericPrometheusQuery_PodStartupLatency_clustermesh-scale-test_2026-04-28T15:00:00Z.json b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-1/GenericPrometheusQuery_PodStartupLatency_clustermesh-scale-test_2026-04-28T15:00:00Z.json
new file mode 100644
index 0000000000..3100934955
--- /dev/null
+++ b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-1/GenericPrometheusQuery_PodStartupLatency_clustermesh-scale-test_2026-04-28T15:00:00Z.json
@@ -0,0 +1,29 @@
+{
+  "version": "v1",
+  "dataItems": [
+    {
+      "labels": {
+        "Metric": "Perc99"
+      },
+      "data": {
+        "value": 1.2
+      }
+    },
+    {
+      "labels": {
+        "Metric": "Perc90"
+      },
+      "data": {
+        "value": 0.8
+      }
+    },
+    {
+      "labels": {
+        "Metric": "Perc50"
+      },
+      "data": {
+        "value": 0.4
+      }
+    }
+  ]
+}
diff --git a/modules/python/tests/mock_data/clustermesh-scale/report/mesh-1/junit.xml b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-1/junit.xml
new file mode 100644
index 0000000000..34a14e3425
--- /dev/null
+++ b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-1/junit.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuite name="ClusterLoaderV2" tests="6" failures="0" errors="0" time="180.000">
+    <testcase name="clustermesh-scale-test overall" classname="ClusterLoaderV2" time="179.5"></testcase>
+    <testcase name="clustermesh-scale-test: [step: 01] Start measurements" classname="ClusterLoaderV2" time="0.1"></testcase>
+    <testcase name="clustermesh-scale-test: [step: 02] create ClusterMesh Pod Monitor" classname="ClusterLoaderV2" time="2.0"></testcase>
+    <testcase name="clustermesh-scale-test: [step: 03] create scale-test deployments" classname="ClusterLoaderV2" time="60.0"></testcase>
+    <testcase name="clustermesh-scale-test: [step: 04] Gather measurements" classname="ClusterLoaderV2" time="60.0"></testcase>
+    <testcase name="clustermesh-scale-test: [step: 05] delete scale-test deployments" classname="ClusterLoaderV2" time="57.4"></testcase>
+</testsuite>
diff --git a/modules/python/tests/mock_data/clustermesh-scale/report/mesh-2/GenericPrometheusQuery_PodStartupLatency_clustermesh-scale-test_2026-04-28T15:00:30Z.json b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-2/GenericPrometheusQuery_PodStartupLatency_clustermesh-scale-test_2026-04-28T15:00:30Z.json
new file mode 100644
index 0000000000..dbfb9aacc8
--- /dev/null
+++ b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-2/GenericPrometheusQuery_PodStartupLatency_clustermesh-scale-test_2026-04-28T15:00:30Z.json
@@ -0,0 +1,29 @@
+{
+  "version": "v1",
+  "dataItems": [
+    {
+      "labels": {
+        "Metric": "Perc99"
+      },
+      "data": {
+        "value": 1.5
+      }
+    },
+    {
+      "labels": {
+        "Metric": "Perc90"
+      },
+      "data": {
+        "value": 1.0
+      }
+    },
+    {
+      "labels": {
+        "Metric": "Perc50"
+      },
+      "data": {
+        "value": 0.5
+      }
+    }
+  ]
+}
diff --git a/modules/python/tests/mock_data/clustermesh-scale/report/mesh-2/junit.xml b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-2/junit.xml
new file mode 100644
index 0000000000..ee983d20bc
--- /dev/null
+++ b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-2/junit.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuite name="ClusterLoaderV2" tests="6" failures="0" errors="0" time="195.000">
+    <testcase name="clustermesh-scale-test overall" classname="ClusterLoaderV2" time="194.5"></testcase>
+    <testcase name="clustermesh-scale-test: [step: 01] Start measurements" classname="ClusterLoaderV2" time="0.1"></testcase>
+    <testcase name="clustermesh-scale-test: [step: 02] create ClusterMesh Pod Monitor" classname="ClusterLoaderV2" time="2.5"></testcase>
+    <testcase name="clustermesh-scale-test: [step: 03] create scale-test deployments" classname="ClusterLoaderV2" time="65.0"></testcase>
+    <testcase name="clustermesh-scale-test: [step: 04] Gather measurements" classname="ClusterLoaderV2" time="65.0"></testcase>
+    <testcase name="clustermesh-scale-test: [step: 05] delete scale-test deployments" classname="ClusterLoaderV2" time="61.9"></testcase>
+</testsuite>
diff --git a/modules/python/tests/mock_data/clustermesh-scale/report/mesh-fail/GenericPrometheusQuery_PodStartupLatency_clustermesh-scale-test_2026-04-28T15:01:00Z.json b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-fail/GenericPrometheusQuery_PodStartupLatency_clustermesh-scale-test_2026-04-28T15:01:00Z.json
new file mode 100644
index 0000000000..868c276002
--- /dev/null
+++ b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-fail/GenericPrometheusQuery_PodStartupLatency_clustermesh-scale-test_2026-04-28T15:01:00Z.json
@@ -0,0 +1,13 @@
+{
+  "version": "v1",
+  "dataItems": [
+    {
+      "labels": {
+        "Metric": "Perc99"
+      },
+      "data": {
+        "value": 99.9
+      }
+    }
+  ]
+}
diff --git a/modules/python/tests/mock_data/clustermesh-scale/report/mesh-fail/junit.xml b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-fail/junit.xml
new file mode 100644
index 0000000000..a9eb1b2c7f
--- /dev/null
+++ b/modules/python/tests/mock_data/clustermesh-scale/report/mesh-fail/junit.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuite name="ClusterLoaderV2" tests="3" failures="1" errors="0" time="42.000">
+    <testcase name="clustermesh-scale-test overall" classname="ClusterLoaderV2" time="41.5">
+        <failure type="ClusterLoaderV2 failure">timeout waiting for deployments to become ready in cluster mesh-fail</failure>
+    </testcase>
+    <testcase name="clustermesh-scale-test: [step: 01] Start measurements" classname="ClusterLoaderV2" time="0.1"></testcase>
+    <testcase name="clustermesh-scale-test: [step: 02] create ClusterMesh Pod Monitor" classname="ClusterLoaderV2" time="0.4"></testcase>
+</testsuite>
diff --git a/modules/python/tests/test_clustermesh_scale.py b/modules/python/tests/test_clustermesh_scale.py
new file mode 100644
index 0000000000..0b9dd7510e
--- /dev/null
+++ b/modules/python/tests/test_clustermesh_scale.py
@@ -0,0 +1,410 @@
+"""Unit tests for the clustermesh-scale CL2 harness.
+
+Target module: modules/python/clusterloader2/clustermesh-scale/scale.py.
+Mirrors tests/test_network_scale.py — the module is loaded via importlib because
+the ``clustermesh-scale`` directory contains a hyphen and is not a valid Python
+package name.
+
+The key invariant under test is multi-cluster attribution: when collect_clusterloader2
+is called once per cluster (as the pipeline's collect.yml does), the resulting JSONL
+rows must each carry distinct cluster identity while sharing run-level fields. Without
+this, downstream Kusto queries cannot group/filter by cluster across the mesh.
+"""
+import importlib.util
+import json
+import os
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+MODULE_PATH = (
+    Path(__file__).resolve().parents[1]
+    / "clusterloader2"
+    / "clustermesh-scale"
+    / "scale.py"
+)
+MODULE_SPEC = importlib.util.spec_from_file_location(
+    "clusterloader2_clustermesh_scale", MODULE_PATH
+)
+if MODULE_SPEC is None or MODULE_SPEC.loader is None:
+    raise ImportError(f"Unable to load module from {MODULE_PATH}")
+clustermesh_scale_module = importlib.util.module_from_spec(MODULE_SPEC)
+MODULE_SPEC.loader.exec_module(clustermesh_scale_module)
+
+configure_clusterloader2 = clustermesh_scale_module.configure_clusterloader2
+collect_clusterloader2 = clustermesh_scale_module.collect_clusterloader2
+main = clustermesh_scale_module.main
+
+MOCK_REPORT_ROOT = os.path.join(
+    os.path.dirname(__file__), "mock_data", "clustermesh-scale", "report"
+)
+
+
+class TestConfigureClustermeshScale(unittest.TestCase):
+    """configure_clusterloader2 writes the CL2 overrides file the pipeline expects."""
+
+    def test_overrides_file_contents(self):
+        """Every CL2_* knob the config template reads must appear in the overrides file."""
+        with tempfile.NamedTemporaryFile(
+            delete=False, mode="w+", encoding="utf-8"
+        ) as tmp:
+            tmp_path = tmp.name
+
+        try:
+            configure_clusterloader2(
+                namespaces=2,
+                deployments_per_namespace=3,
+                replicas_per_deployment=4,
+                operation_timeout="20m",
+                override_file=tmp_path,
+            )
+
+            with open(tmp_path, "r", encoding="utf-8") as f:
+                content = f.read()
+
+            # Prometheus knobs — scrape Cilium agent/operator so measurement
+            # modules have data. Memory LIMIT honored via overrides; the
+            # REQUEST is set via the --prometheus-memory-request CLI flag in
+            # execute_clusterloader2 (CL2_PROMETHEUS_MEMORY_REQUEST is not a
+            # real overrides key for this CL2 image). NODE_SELECTOR pins the
+            # Prometheus pod to the dedicated `prompool` node defined in
+            # azure-2.tfvars (label prometheus=true).
+            self.assertIn("CL2_PROMETHEUS_TOLERATE_MASTER: true", content)
+            self.assertIn("CL2_PROMETHEUS_MEMORY_LIMIT: 2Gi", content)
+            self.assertIn('CL2_PROMETHEUS_NODE_SELECTOR: "prometheus: \\"true\\""', content)
+            self.assertIn("CL2_PROMETHEUS_SCRAPE_CILIUM_AGENT: true", content)
+            self.assertIn("CL2_PROMETHEUS_SCRAPE_CILIUM_OPERATOR: true", content)
+            self.assertIn("CL2_POD_STARTUP_LATENCY_THRESHOLD: 3m", content)
+            self.assertIn("CL2_ENABLE_VIOLATIONS_FOR_API_CALL_PROMETHEUS_SIMPLE: false", content)
+            self.assertNotIn("CL2_PROMETHEUS_MEMORY_REQUEST", content)
+            self.assertNotIn("CL2_PROMETHEUS_MEMORY_LIMIT_FACTOR", content)
+            self.assertNotIn("CL2_PROMETHEUS_MEMORY_SCALE_FACTOR", content)
+            self.assertNotIn("CL2_PROMETHEUS_CPU_SCALE_FACTOR", content)
+
+            # Topology knobs round-tripped from arguments.
+            self.assertIn("CL2_NAMESPACES: 2", content)
+            self.assertIn("CL2_DEPLOYMENTS_PER_NAMESPACE: 3", content)
+            self.assertIn("CL2_REPLICAS_PER_DEPLOYMENT: 4", content)
+            self.assertIn("CL2_OPERATION_TIMEOUT: 20m", content)
+        finally:
+            os.remove(tmp_path)
+
+    def test_overrides_file_timeout_passthrough(self):
+        """Caller-provided operation_timeout flows through unchanged (no clamping)."""
+        with tempfile.NamedTemporaryFile(
+            delete=False, mode="w+", encoding="utf-8"
+        ) as tmp:
+            tmp_path = tmp.name
+        try:
+            configure_clusterloader2(
+                namespaces=1,
+                deployments_per_namespace=1,
+                replicas_per_deployment=1,
+                operation_timeout="45m",
+                override_file=tmp_path,
+            )
+            with open(tmp_path, "r", encoding="utf-8") as f:
+                self.assertIn("CL2_OPERATION_TIMEOUT: 45m", f.read())
+        finally:
+            os.remove(tmp_path)
+
+
+class TestCollectSingleCluster(unittest.TestCase):
+    """collect_clusterloader2 emits one JSONL row per call, tagged with cluster identity."""
+
+    def _collect(self, *, cluster_name, cluster_count=2, mesh_size=2,
+                 test_type="unit-test", report_subdir="mesh-1"):
+        result_file = tempfile.mktemp(suffix=".jsonl")
+        collect_clusterloader2(
+            cl2_report_dir=os.path.join(MOCK_REPORT_ROOT, report_subdir),
+            cloud_info=json.dumps({"cloud": "azure", "region": "eastus2"}),
+            run_id="test-run-123",
+            run_url="http://example.com/run123",
+            result_file=result_file,
+            test_type=test_type,
+            start_timestamp="2026-04-28T15:00:00Z",
+            cluster_name=cluster_name,
+            cluster_count=cluster_count,
+            mesh_size=mesh_size,
+            namespaces=2,
+            deployments_per_namespace=3,
+            replicas_per_deployment=4,
+            trigger_reason="Manual",
+        )
+        return result_file
+
+    def test_collect_creates_result_file(self):
+        """collect_clusterloader2 writes a non-empty JSONL with run-level fields."""
+        result_file = self._collect(cluster_name="mesh-1")
+        try:
+            self.assertTrue(os.path.exists(result_file))
+            with open(result_file, "r", encoding="utf-8") as f:
+                content = f.read()
+            self.assertGreater(len(content), 0)
+            lines = content.strip().split("\n")
+            self.assertGreaterEqual(len(lines), 1)
+            row = json.loads(lines[0])
+            self.assertEqual(row["status"], "success")
+            self.assertEqual(row["run_id"], "test-run-123")
+            self.assertEqual(row["test_type"], "unit-test")
+            self.assertEqual(row["start_timestamp"], "2026-04-28T15:00:00Z")
+        finally:
+            if os.path.exists(result_file):
+                os.remove(result_file)
+
+    def test_collect_attributes_cluster_identity(self):
+        """Cluster identity is propagated to BOTH top-level and test_details, per Kusto schema."""
+        result_file = self._collect(cluster_name="mesh-1", cluster_count=2)
+        try:
+            with open(result_file, "r", encoding="utf-8") as f:
+                row = json.loads(f.read().strip().split("\n")[0])
+            self.assertEqual(row["cluster"], "mesh-1")
+            self.assertEqual(row["cluster_count"], 2)
+            self.assertEqual(row["test_details"]["cluster"], "mesh-1")
+            self.assertEqual(row["test_details"]["cluster_count"], 2)
+        finally:
+            if os.path.exists(result_file):
+                os.remove(result_file)
+
+    def test_collect_computes_pods_per_cluster(self):
+        """pods_per_cluster = namespaces * deployments * replicas (2 * 3 * 4 = 24)."""
+        result_file = self._collect(cluster_name="mesh-1")
+        try:
+            with open(result_file, "r", encoding="utf-8") as f:
+                row = json.loads(f.read().strip().split("\n")[0])
+            self.assertEqual(row["test_details"]["pods_per_cluster"], 24)
+            self.assertEqual(row["namespaces"], 2)
+            self.assertEqual(row["deployments_per_namespace"], 3)
+            self.assertEqual(row["replicas_per_deployment"], 4)
+        finally:
+            if os.path.exists(result_file):
+                os.remove(result_file)
+
+    def test_collect_emits_mesh_size_independent_of_cluster_count(self):
+        """mesh_size (configured target) and cluster_count (observed) must be distinct fields.
+
+        Querying ``mesh_size != cluster_count`` in Kusto is how we surface
+        partial-mesh runs — a Fleet member that failed to join would manifest
+        as a smaller observed cluster_count than the configured mesh_size.
+        Both fields must be present at top level AND in test_details.
+        """
+        result_file = self._collect(cluster_name="mesh-1", cluster_count=4, mesh_size=5)
+        try:
+            with open(result_file, "r", encoding="utf-8") as f:
+                row = json.loads(f.read().strip().split("\n")[0])
+            self.assertEqual(row["mesh_size"], 5)
+            self.assertEqual(row["cluster_count"], 4)
+            self.assertEqual(row["test_details"]["mesh_size"], 5)
+            self.assertEqual(row["test_details"]["cluster_count"], 4)
+            self.assertNotEqual(row["mesh_size"], row["cluster_count"])
+        finally:
+            if os.path.exists(result_file):
+                os.remove(result_file)
+
+    def test_collect_propagates_test_type(self):
+        """test_type tags every JSONL row so Kusto can filter scenario flavors.
+
+        Scale-scenario #1 (event-throughput) and the default-config Phase-1
+        smoke run share one results table; downstream dashboards filter on
+        ``test_type == 'event-throughput'`` to scope the scaling-curve view
+        to the right workload. Regression-guards that the field flows through
+        unmodified.
+        """
+        result_file = self._collect(cluster_name="mesh-1", test_type="event-throughput")
+        try:
+            with open(result_file, "r", encoding="utf-8") as f:
+                row = json.loads(f.read().strip().split("\n")[0])
+            self.assertEqual(row["test_type"], "event-throughput")
+        finally:
+            if os.path.exists(result_file):
+                os.remove(result_file)
+
+
+class TestCollectMultiCluster(unittest.TestCase):
+    """The multi-cluster aggregation invariant — the reason this scenario exists.
+
+    collect.yml calls scale.py once per cluster and concatenates per-cluster JSONL
+    files into a single TEST_RESULTS_FILE. The resulting stream MUST have:
+      * one logical row per cluster
+      * each row's `cluster` field distinct
+      * `cluster_count` consistent across rows
+      * `run_id` consistent across rows (same pipeline run)
+    Without this, downstream Kusto cannot group/filter by cluster.
+    """
+
+    def _collect(self, *, cluster_name, report_subdir):
+        result_file = tempfile.mktemp(suffix=f".{cluster_name}.jsonl")
+        collect_clusterloader2(
+            cl2_report_dir=os.path.join(MOCK_REPORT_ROOT, report_subdir),
+            cloud_info=json.dumps({"cloud": "azure"}),
+            run_id="multi-cluster-run",
+            run_url="http://example.com/multi",
+            result_file=result_file,
+            test_type="unit-test",
+            start_timestamp="2026-04-28T15:00:00Z",
+            cluster_name=cluster_name,
+            cluster_count=2,
+            mesh_size=2,
+            namespaces=1,
+            deployments_per_namespace=1,
+            replicas_per_deployment=1,
+            trigger_reason="",
+        )
+        return result_file
+
+    def test_two_clusters_aggregate_with_distinct_attribution(self):
+        """Aggregating per-cluster JSONLs yields rows with distinct cluster identity."""
+        f1 = self._collect(cluster_name="mesh-1", report_subdir="mesh-1")
+        f2 = self._collect(cluster_name="mesh-2", report_subdir="mesh-2")
+        try:
+            # Mirror what collect.yml does: cat per-cluster files into one stream.
+            aggregated = ""
+            for path in (f1, f2):
+                with open(path, "r", encoding="utf-8") as f:
+                    aggregated += f.read()
+
+            rows = [json.loads(line) for line in aggregated.strip().split("\n") if line]
+            # Each per-cluster collect emits at least one row (overall testsuite line).
+            self.assertGreaterEqual(len(rows), 2)
+
+            clusters_seen = {row["cluster"] for row in rows}
+            self.assertEqual(clusters_seen, {"mesh-1", "mesh-2"})
+
+            # Run-level fields must be identical across all rows.
+            run_ids = {row["run_id"] for row in rows}
+            cluster_counts = {row["cluster_count"] for row in rows}
+            mesh_sizes = {row["mesh_size"] for row in rows}
+            self.assertEqual(run_ids, {"multi-cluster-run"})
+            self.assertEqual(cluster_counts, {2})
+            # mesh_size is a run-level constant — it must be identical across
+            # every per-cluster row in the aggregated stream.
+            self.assertEqual(mesh_sizes, {2})
+        finally:
+            for path in (f1, f2):
+                if os.path.exists(path):
+                    os.remove(path)
+
+
+class TestCollectFailureStatus(unittest.TestCase):
+    """A junit.xml with failures>0 must produce status=failure (no silent green)."""
+
+    def test_failure_in_junit_propagates_to_status(self):
+        """A junit testsuite with failures>0 must surface as status=failure in the JSONL."""
+        result_file = tempfile.mktemp(suffix=".jsonl")
+        try:
+            collect_clusterloader2(
+                cl2_report_dir=os.path.join(MOCK_REPORT_ROOT, "mesh-fail"),
+                cloud_info="",
+                run_id="fail-run",
+                run_url="",
+                result_file=result_file,
+                test_type="unit-test",
+                start_timestamp="2026-04-28T15:00:00Z",
+                cluster_name="mesh-fail",
+                cluster_count=2,
+                mesh_size=2,
+                namespaces=1,
+                deployments_per_namespace=1,
+                replicas_per_deployment=1,
+                trigger_reason="",
+            )
+            with open(result_file, "r", encoding="utf-8") as f:
+                row = json.loads(f.read().strip().split("\n")[0])
+            self.assertEqual(row["status"], "failure")
+            self.assertEqual(row["cluster"], "mesh-fail")
+            details = row["test_details"]["details"]
+            self.assertIsNotNone(details)
+            self.assertIn("timeout", json.dumps(details).lower())
+        finally:
+            if os.path.exists(result_file):
+                os.remove(result_file)
+
+
+class TestMainArgumentParsing(unittest.TestCase):
+    """main() dispatches subcommands to the right function with the right args."""
+
+    @patch.object(clustermesh_scale_module, "configure_clusterloader2")
+    def test_configure_command_parsing(self, mock_configure):
+        """`configure` subcommand wires CLI args through to configure_clusterloader2."""
+        test_args = [
+            "clustermesh-scale/scale.py",
+            "configure",
+            "--namespaces", "2",
+            "--deployments-per-namespace", "3",
+            "--replicas-per-deployment", "4",
+            "--operation-timeout", "20m",
+            "--cl2_override_file", "/tmp/overrides.yaml",
+        ]
+        with patch.object(sys, "argv", test_args):
+            main()
+        mock_configure.assert_called_once_with(2, 3, 4, "20m", "/tmp/overrides.yaml")
+
+    @patch.object(clustermesh_scale_module, "execute_clusterloader2")
+    def test_execute_command_parsing(self, mock_execute):
+        """`execute` subcommand wires CLI args through to execute_clusterloader2."""
+        test_args = [
+            "clustermesh-scale/scale.py",
+            "execute",
+            "--cl2-image", "ghcr.io/azure/clusterloader2:v20250513",
+            "--cl2-config-dir", "/path/to/config",
+            "--cl2-report-dir", "/path/to/report",
+            "--cl2-config-file", "config.yaml",
+            "--kubeconfig", "/path/to/kubeconfig",
+            "--provider", "aks",
+        ]
+        with patch.object(sys, "argv", test_args):
+            main()
+        mock_execute.assert_called_once_with(
+            "ghcr.io/azure/clusterloader2:v20250513",
+            "/path/to/config",
+            "/path/to/report",
+            "config.yaml",
+            "/path/to/kubeconfig",
+            "aks",
+        )
+
+    @patch.object(clustermesh_scale_module, "collect_clusterloader2")
+    def test_collect_command_parsing(self, mock_collect):
+        """`collect` subcommand wires CLI args through to collect_clusterloader2."""
+        test_args = [
+            "clustermesh-scale/scale.py",
+            "collect",
+            "--cl2_report_dir", "/path/to/report",
+            "--cloud_info", "{}",
+            "--run_id", "abc",
+            "--run_url", "http://example.com",
+            "--result_file", "/tmp/results.jsonl",
+            "--test_type", "default-config",
+            "--start_timestamp", "2026-04-28T15:00:00Z",
+            "--cluster-name", "mesh-1",
+            "--cluster-count", "2",
+            "--mesh-size", "2",
+            "--namespaces", "1",
+            "--deployments-per-namespace", "1",
+            "--replicas-per-deployment", "1",
+            "--trigger_reason", "Manual",
+        ]
+        with patch.object(sys, "argv", test_args):
+            main()
+        mock_collect.assert_called_once_with(
+            "/path/to/report",
+            "{}",
+            "abc",
+            "http://example.com",
+            "/tmp/results.jsonl",
+            "default-config",
+            "2026-04-28T15:00:00Z",
+            "mesh-1",
+            2,
+            2,
+            1,
+            1,
+            1,
+            "Manual",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/modules/terraform/azure/aks-cli/main.tf b/modules/terraform/azure/aks-cli/main.tf
index 47395fcab6..687ca04e5b 100644
--- a/modules/terraform/azure/aks-cli/main.tf
+++ b/modules/terraform/azure/aks-cli/main.tf
@@ -53,6 +53,12 @@ locals {
     try(var.subnets_map[var.aks_cli_config.subnet_name], null)
   )
 
+  pod_subnet_id = (
+    try(var.aks_cli_config.pod_subnet_name, null) == null ?
+    null :
+    try(var.subnets_map[var.aks_cli_config.pod_subnet_name], null)
+  )
+
   api_server_subnet_id = (
     var.aks_cli_config.api_server_subnet_name == null ?
     null :
@@ -118,6 +124,14 @@ locals {
     )
   )
 
+  pod_subnet_id_parameter = (local.pod_subnet_id == null ?
+    "" :
+    format(
+      "%s %s",
+      "--pod-subnet-id", local.pod_subnet_id,
+    )
+  )
+
   managed_identity_parameter = (var.aks_cli_config.managed_identity_name == null ?
     "--enable-managed-identity" :
     format(
@@ -193,6 +207,7 @@ locals {
     local.kms_parameters,
     local.disk_encryption_parameters,
     local.subnet_id_parameter,
+    local.pod_subnet_id_parameter,
     local.managed_identity_parameter,
     local.kubelet_identity_parameter,
     local.api_server_vnet_integration_parameter,
diff --git a/modules/terraform/azure/fleet/main.tf b/modules/terraform/azure/fleet/main.tf
new file mode 100644
index 0000000000..559050996e
--- /dev/null
+++ b/modules/terraform/azure/fleet/main.tf
@@ -0,0 +1,336 @@
+# =============================================================================
+# Fleet + ClusterMesh Profile submodule
+#
+# Mirrors Steps 4-6 of fleet-setup-script.sh:
+#   Step 4: az fleet create
+#   Step 5: az fleet member create --labels mesh=true  (per cluster)
+#   Step 6: az fleet clustermeshprofile create --selector mesh=true
+#           az fleet clustermeshprofile apply
+#
+# Design decisions:
+# - Fleet resource: azapi_resource. There is no stable azurerm resource that
+#   covers managed Fleet with the shape we need, and the clustermeshprofile
+#   lives under the same ARM parent, so keeping Fleet in azapi keeps the
+#   parent_id references simple.
+# - Fleet members: terraform_data + local-exec wrapping
+#   `az fleet member create --labels`. Member labels (needed by the
+#   clustermeshprofile selector) are first-class in the Fleet ARM API but
+#   the azapi resource body shape is currently rejected for this field;
+#   az CLI is the supported surface today.
+# - ClusterMeshProfile create/apply: terraform_data + local-exec, wrapping
+#   `az fleet clustermeshprofile create` and `apply`. The ARM resource type
+#   is still private-preview — az CLI (v2.0.4+ private .whl) is currently
+#   the only path. Create and destroy commands are stored inside
+#   terraform_data.input so the destroy-time provisioner can reference
+#   self.input.<cmd> (destroy-time provisioners can't read vars/locals).
+#   Same pattern as modules/terraform/azure/aks-cli/main.tf:271-318.
+# =============================================================================
+
+locals {
+  fleet_enabled = var.fleet_enabled
+
+  members_by_name = { for m in var.members : m.member_name => m }
+
+  # Construct AKS resource IDs from known inputs. aks-cli does not emit outputs.
+  # The depends_on chain on the fleet module instance ensures AKS exists before
+  # these IDs are referenced by the member create call.
+  aks_resource_id = {
+    for m in var.members :
+    m.member_name => format(
+      "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.ContainerService/managedClusters/%s",
+      var.subscription_id,
+      var.resource_group_name,
+      m.aks_name,
+    )
+  }
+}
+
+# -----------------------------------------------------------------------------
+# Step 4: Fleet resource
+# -----------------------------------------------------------------------------
+resource "azapi_resource" "fleet" {
+  count = local.fleet_enabled ? 1 : 0
+
+  type      = "Microsoft.ContainerService/fleets@2025-03-01"
+  name      = var.fleet_name
+  parent_id = "/subscriptions/${var.subscription_id}/resourceGroups/${var.resource_group_name}"
+  location  = var.location
+  tags      = var.tags
+
+  body = {
+    properties = {}
+  }
+}
+
+# -----------------------------------------------------------------------------
+# Step 5: Fleet members (one per AKS cluster), labeled for the mesh selector.
+#
+# Implemented via local-exec for two reasons:
+# 1. Mirrors the source script exactly (`az fleet member create --labels mesh=true`).
+# 2. The Fleet member ARM API rejects azapi-style bodies for the `labels` field;
+#    az CLI is the supported surface for this resource shape today.
+#
+# Same pattern as the clustermeshprofile below: command stored in
+# terraform_data.input so destroy-time provisioner can reference self.input.*.
+# -----------------------------------------------------------------------------
+locals {
+  member_create_command = {
+    for m in var.members : m.member_name => join(" ", [
+      "az fleet member create",
+      "--subscription", var.subscription_id,
+      "--resource-group", var.resource_group_name,
+      "--fleet-name", var.fleet_name,
+      "--name", m.member_name,
+      "--member-cluster-id", local.aks_resource_id[m.member_name],
+      "--labels", "${var.member_label_key}=${var.member_label_value}",
+      "--output", "none",
+    ])
+  }
+
+  member_destroy_command = {
+    for m in var.members : m.member_name => join(" ", [
+      "az fleet member delete",
+      "--subscription", var.subscription_id,
+      "--resource-group", var.resource_group_name,
+      "--fleet-name", var.fleet_name,
+      "--name", m.member_name,
+      "--yes",
+      "--output", "none",
+    ])
+  }
+
+  # Re-label members during destroy so the clustermeshprofile's
+  # `${member_label_key}=${member_label_value}` selector no longer matches —
+  # this is the only way out of the Fleet API's chicken-and-egg between
+  # `member delete` (rejects with MemberBelongsToClusterMesh while attached)
+  # and `clustermeshprofile delete` (rejects with
+  # CannotDeleteClusterMeshProfileWithMembers while members exist). The
+  # value `detaching` is intentionally non-matching; `az fleet member update
+  # --labels` REPLACES the labels map (it's not additive), so this also
+  # drops the original mesh=true label.
+  member_relabel_command = {
+    for m in var.members : m.member_name => join(" ", [
+      "az fleet member update",
+      "--subscription", var.subscription_id,
+      "--resource-group", var.resource_group_name,
+      "--fleet-name", var.fleet_name,
+      "--name", m.member_name,
+      "--labels", "${var.member_label_key}=detaching",
+      "--output", "none",
+    ])
+  }
+}
+
+resource "terraform_data" "member" {
+  for_each = local.fleet_enabled ? local.members_by_name : {}
+
+  depends_on = [azapi_resource.fleet]
+
+  input = {
+    create_command  = local.member_create_command[each.value.member_name]
+    destroy_command = local.member_destroy_command[each.value.member_name]
+  }
+
+  # Bash retry loop. The Fleet RP can lag behind the AKS RP by 30-60s after
+  # a fresh AKS create; without retry, `az fleet member create` returns
+  # DependentResourceNotFound. Additionally, the AKS cluster can be in
+  # `Updating` state for several minutes after the Network Contributor role
+  # assignment on the VNet (granted in modules/terraform/azure/main.tf for the
+  # clustermesh-apiserver internal LB) — `az fleet member create` rejects
+  # with `ManagedClusterNotInExpectedState` until reconciliation finishes.
+  # 60 x 20s = 20 min covers slow Azure days; the happy path exits on the
+  # first attempt (~5s).
+  provisioner "local-exec" {
+    interpreter = ["bash", "-c"]
+    command     = <<-EOT
+      set -euo pipefail
+      cmd='${self.input.create_command}'
+      max=60
+      delay=20
+      for i in $(seq 1 $max); do
+        echo "[$i/$max] $cmd"
+        if eval "$cmd"; then
+          exit 0
+        fi
+        if [ "$i" -lt "$max" ]; then
+          echo "Fleet RP not ready yet, retrying in $${delay}s..."
+          sleep "$delay"
+        fi
+      done
+      echo "az fleet member create failed after $max attempts" >&2
+      exit 1
+    EOT
+  }
+
+  provisioner "local-exec" {
+    when        = destroy
+    interpreter = ["bash", "-c"]
+    command     = "${self.input.destroy_command} || true"
+  }
+}
+
+# -----------------------------------------------------------------------------
+# Step 6: ClusterMesh profile (create + apply) via local-exec.
+#
+# Both the create and the destroy commands are stored inside
+# terraform_data.input so the destroy provisioner can reference self.input.*
+# (destroy-time provisioners cannot reference var.* or local.*).
+#
+# Destroy ordering: this resource depends on every fleet member, so on destroy
+# Terraform tears down the profile BEFORE the members (and before the AKS
+# clusters downstream). That matches the source-of-truth teardown: detach the
+# mesh before the clusters disappear, else extension reconciliation hangs.
+# -----------------------------------------------------------------------------
+locals {
+  cmp_create_command = local.fleet_enabled ? join(" ", [
+    "az fleet clustermeshprofile create",
+    "--subscription", var.subscription_id,
+    "--resource-group", var.resource_group_name,
+    "--fleet-name", var.fleet_name,
+    "--name", var.cmp_name,
+    "--selector", "${var.member_label_key}=${var.member_label_value}",
+    "--output", "none",
+  ]) : "true"
+
+  cmp_apply_command = local.fleet_enabled ? join(" ", [
+    "az fleet clustermeshprofile apply",
+    "--subscription", var.subscription_id,
+    "--resource-group", var.resource_group_name,
+    "--fleet-name", var.fleet_name,
+    "--name", var.cmp_name,
+    "--output", "none",
+  ]) : "true"
+
+  cmp_destroy_command = local.fleet_enabled ? join(" ", [
+    "az fleet clustermeshprofile delete",
+    "--subscription", var.subscription_id,
+    "--resource-group", var.resource_group_name,
+    "--fleet-name", var.fleet_name,
+    "--name", var.cmp_name,
+    "--yes",
+    "--output", "none",
+  ]) : "true"
+
+  # Returns the count of fleet members CURRENTLY APPLIED to the profile (i.e.
+  # in the profile's reconciled member set, not just selector-matched). Used
+  # by the destroy provisioner to wait for relabel+apply to drain the set
+  # before attempting the profile delete.
+  cmp_list_applied_count_command = local.fleet_enabled ? join(" ", [
+    "az fleet clustermeshprofile list-members",
+    "--subscription", var.subscription_id,
+    "--resource-group", var.resource_group_name,
+    "--fleet-name", var.fleet_name,
+    "--name", var.cmp_name,
+    "--query", "'length(@)'",
+    "--output", "tsv",
+  ]) : "echo 0"
+}
+
+resource "terraform_data" "clustermeshprofile" {
+  count = local.fleet_enabled ? 1 : 0
+
+  depends_on = [
+    terraform_data.member,
+  ]
+
+  input = {
+    create_command = local.cmp_create_command
+    apply_command  = local.cmp_apply_command
+    delete_command = local.cmp_destroy_command
+    # `list-members` (default mode) returns members APPLIED to the profile —
+    # the same set the profile-delete API checks. We poll its count to know
+    # when the relabel+apply reconcile has actually drained membership.
+    list_applied_count_command = local.cmp_list_applied_count_command
+    # Pre-built per-member `az fleet member update --labels` commands. Joined
+    # with newlines and embedded in self.input because destroy provisioners
+    # can only access self.input.* (not var.* / local.*).
+    member_relabel_commands = local.fleet_enabled ? join("\n", values(local.member_relabel_command)) : ""
+  }
+
+  # create + apply are two separate az calls. Use bash with `set -euo pipefail`
+  # so any failure aborts the chain.
+  provisioner "local-exec" {
+    interpreter = ["bash", "-c"]
+    command     = "set -euo pipefail; ${self.input.create_command}; ${self.input.apply_command}"
+  }
+
+  # Destroy-time: Fleet's API has a chicken-and-egg between member-delete
+  # and clustermeshprofile-delete:
+  #   - `az fleet member delete` rejects with `MemberBelongsToClusterMesh`
+  #     while the member is still selected by any clustermeshprofile.
+  #   - `az fleet clustermeshprofile delete` rejects with
+  #     `CannotDeleteClusterMeshProfileWithMembers` while any member is
+  #     still in the profile.
+  # The az fleet 2.0.4 extension exposes no first-class detach/remove-member
+  # command. The way out is to UPDATE each member's labels to a value that
+  # the profile selector no longer matches (the profile selects on
+  # `${var.member_label_key}=${var.member_label_value}` from create-time),
+  # then re-`apply` the profile so it reconciles to an empty member set,
+  # then delete the profile. After that the per-member destroy provisioner
+  # on terraform_data.member runs successfully (members are no longer
+  # attached to any profile).
+  #
+  # All steps are best-effort (`|| true` / `exit 0` at the end) so a
+  # partial-state teardown still progresses to RG cleanup.
+  provisioner "local-exec" {
+    when        = destroy
+    interpreter = ["bash", "-c"]
+    command     = <<-EOT
+      set -uo pipefail
+      # 1. Relabel every member off the profile's selector. After this, a
+      # subsequent `apply` will reconcile the profile's member set to empty.
+      printf '%s\n' "${self.input.member_relabel_commands}" | while IFS= read -r cmd; do
+        [ -n "$cmd" ] || continue
+        echo "[relabel-member] $cmd"
+        eval "$cmd" || true
+      done
+
+      # 2. Issue an apply to start the reconcile. apply is async on the Fleet
+      # RP — `az fleet clustermeshprofile apply` returns when the LRO is
+      # accepted, but membership reconciliation (including draining the old
+      # applied set) can lag behind by several minutes.
+      echo "[apply-profile] ${self.input.apply_command}"
+      eval "${self.input.apply_command}" || true
+
+      # 3. Poll the profile's APPLIED member count until it reaches 0. Re-issue
+      # `apply` periodically as a nudge in case the first one was a no-op
+      # (e.g. Fleet RP hadn't yet observed the relabeled members).
+      # Budget: 120 x 5s = 10 min.
+      drained=false
+      for i in $(seq 1 120); do
+        count=$(eval "${self.input.list_applied_count_command}" 2>/dev/null | tr -d '[:space:]')
+        echo "[poll-members] attempt $i/120: applied count='$count'"
+        if [ "$count" = "0" ]; then
+          drained=true
+          break
+        fi
+        # Re-apply every minute (every 12 polls) to push Fleet RP if the
+        # initial apply didn't pick up the relabel.
+        if [ "$i" -gt 1 ] && [ $((i % 12)) -eq 0 ]; then
+          echo "[apply-profile] (nudge) ${self.input.apply_command}"
+          eval "${self.input.apply_command}" || true
+        fi
+        sleep 5
+      done
+      if [ "$drained" != "true" ]; then
+        echo "[poll-members] timed out waiting for applied set to drain; will still attempt delete"
+      fi
+
+      # 4. Delete the profile. Brief retry as a backstop in case there's still
+      # propagation lag between list-members showing 0 and delete being allowed.
+      echo "[delete-profile] ${self.input.delete_command}"
+      for i in $(seq 1 30); do
+        if eval "${self.input.delete_command}"; then
+          echo "[delete-profile] succeeded on attempt $i"
+          exit 0
+        fi
+        if [ "$i" -lt 30 ]; then
+          echo "[delete-profile] retry $i/30 in 5s"
+          sleep 5
+        fi
+      done
+      echo "[delete-profile] gave up after 30 attempts; downstream cleanup will proceed"
+      exit 0
+    EOT
+  }
+}
diff --git a/modules/terraform/azure/fleet/outputs.tf b/modules/terraform/azure/fleet/outputs.tf
new file mode 100644
index 0000000000..04c5ff508e
--- /dev/null
+++ b/modules/terraform/azure/fleet/outputs.tf
@@ -0,0 +1,14 @@
+output "fleet_name" {
+  description = "Name of the Fleet resource (empty when fleet_enabled=false)."
+  value       = var.fleet_enabled ? var.fleet_name : ""
+}
+
+output "cmp_name" {
+  description = "Name of the ClusterMesh profile (empty when fleet_enabled=false)."
+  value       = var.fleet_enabled ? var.cmp_name : ""
+}
+
+output "member_names" {
+  description = "List of fleet member names created."
+  value       = var.fleet_enabled ? [for m in var.members : m.member_name] : []
+}
diff --git a/modules/terraform/azure/fleet/variables.tf b/modules/terraform/azure/fleet/variables.tf
new file mode 100644
index 0000000000..ee4820e779
--- /dev/null
+++ b/modules/terraform/azure/fleet/variables.tf
@@ -0,0 +1,57 @@
+variable "fleet_enabled" {
+  description = "Whether to create the Fleet, members, and clustermeshprofile."
+  type        = bool
+  default     = false
+}
+
+variable "resource_group_name" {
+  description = "Resource group that contains the Fleet and the member AKS clusters."
+  type        = string
+}
+
+variable "location" {
+  description = "Azure region for the Fleet resource."
+  type        = string
+}
+
+variable "subscription_id" {
+  description = "Azure subscription GUID (used to construct AKS resource IDs and CLI calls)."
+  type        = string
+}
+
+variable "fleet_name" {
+  description = "Name of the Azure Fleet Manager resource."
+  type        = string
+}
+
+variable "cmp_name" {
+  description = "Name of the Fleet ClusterMesh Profile."
+  type        = string
+}
+
+variable "member_label_key" {
+  description = "Label key set on fleet members and used as the clustermeshprofile selector."
+  type        = string
+  default     = "mesh"
+}
+
+variable "member_label_value" {
+  description = "Label value set on fleet members and used as the clustermeshprofile selector."
+  type        = string
+  default     = "true"
+}
+
+variable "members" {
+  description = "List of fleet members. aks_name identifies the AKS cluster in the same resource group; member_name is the Fleet-side name (intentionally may differ from aks_name)."
+  type = list(object({
+    member_name = string
+    aks_name    = string
+  }))
+  default = []
+}
+
+variable "tags" {
+  description = "Tags applied to the Fleet resource."
+  type        = map(string)
+  default     = {}
+}
diff --git a/modules/terraform/azure/fleet/versions.tf b/modules/terraform/azure/fleet/versions.tf
new file mode 100644
index 0000000000..71a8e66c18
--- /dev/null
+++ b/modules/terraform/azure/fleet/versions.tf
@@ -0,0 +1,9 @@
+terraform {
+  required_version = ">=1.5.6"
+  required_providers {
+    azapi = {
+      source  = "Azure/azapi"
+      version = "2.8.0"
+    }
+  }
+}
diff --git a/modules/terraform/azure/main.tf b/modules/terraform/azure/main.tf
index ea48654f41..2d04ad1bf4 100644
--- a/modules/terraform/azure/main.tf
+++ b/modules/terraform/azure/main.tf
@@ -320,3 +320,94 @@ module "virtual_machine" {
   # Ensure AKS cluster is created before VM tries to look it up for RBAC
   depends_on = [module.aks, module.aks-cli, module.azapi]
 }
+
+# =============================================================================
+# ClusterMesh add-ons (vnet-peering + fleet + clustermeshprofile).
+#
+# Both are no-ops unless explicitly enabled in their *_config variable. Used
+# today only by the clustermesh-scale scenario.
+# =============================================================================
+
+data "azurerm_client_config" "current" {}
+
+module "vnet_peering" {
+  source = "./vnet-peering"
+
+  peering_enabled     = try(var.vnet_peering_config.enabled, false)
+  resource_group_name = local.run_id
+  vnet_role_to_id     = { for role in keys(local.network_config_map) : role => module.virtual_network[role].vnet_id }
+  vnet_role_to_name   = { for role, nw in local.network_config_map : role => nw.vnet_name }
+
+  depends_on = [module.virtual_network]
+}
+
+# -----------------------------------------------------------------------------
+# Network Contributor on each member's VNet for the AKS control-plane identity.
+#
+# Required so AKS cloud-controller-manager can provision the
+# clustermesh-apiserver internal LoadBalancer Service. `az aks create`
+# auto-grants the cluster identity Network Contributor on the *node subnet*,
+# but LB provisioning on that subnet additionally needs VNet-level read.
+# Without this grant the Service stays at EXTERNAL-IP=<pending>, the
+# `cilium clustermesh status` CLI fails with "unable to derive service IPs
+# automatically", and the per-agent `cilium-clustermesh` secret is never
+# populated → cilium-dbg reports "ClusterMesh: 0/0 remote clusters ready".
+#
+# Mirrors fleet-setup-script.sh Step 3 (the reference manual setup script).
+# Gated on fleet_config.enabled so non-clustermesh scenarios are unaffected.
+# -----------------------------------------------------------------------------
+locals {
+  clustermesh_member_roles = try(var.fleet_config.enabled, false) ? {
+    for m in try(var.fleet_config.members, []) : m.aks_role => m.aks_role
+  } : {}
+}
+
+data "azurerm_kubernetes_cluster" "clustermesh_member" {
+  for_each = local.clustermesh_member_roles
+
+  name                = local.aks_cli_config_map[each.key].aks_name
+  resource_group_name = local.run_id
+
+  # aks-cli creates the cluster via local-exec; depends_on defers the data
+  # read until apply time when the cluster actually exists.
+  depends_on = [module.aks-cli]
+}
+
+resource "azurerm_role_assignment" "clustermesh_vnet_contributor" {
+  for_each = local.clustermesh_member_roles
+
+  scope                = module.virtual_network[each.key].vnet_id
+  role_definition_name = "Network Contributor"
+  principal_id         = data.azurerm_kubernetes_cluster.clustermesh_member[each.key].identity[0].principal_id
+}
+
+module "fleet" {
+  source = "./fleet"
+
+  fleet_enabled       = try(var.fleet_config.enabled, false)
+  resource_group_name = local.run_id
+  location            = local.region
+  subscription_id     = data.azurerm_client_config.current.subscription_id
+  fleet_name          = try(var.fleet_config.fleet_name, "")
+  cmp_name            = try(var.fleet_config.cmp_name, "")
+  member_label_key    = try(var.fleet_config.member_label_key, "mesh")
+  member_label_value  = try(var.fleet_config.member_label_value, "true")
+  members = [
+    for m in try(var.fleet_config.members, []) : {
+      member_name = m.member_name
+      aks_name    = local.aks_cli_config_map[m.aks_role].aks_name
+    }
+  ]
+  tags = local.tags
+
+  # AKS clusters must exist before we join them as fleet members and apply the
+  # mesh profile. Peering must exist too — apply reaches the mesh-apiserver LB
+  # endpoints cross-cluster, which requires peering (separate-VNet mode).
+  # Network Contributor on each VNet must exist before clustermeshprofile apply
+  # so cloud-controller-manager can provision the apiserver internal LB.
+  depends_on = [
+    module.aks-cli,
+    module.vnet_peering,
+    azurerm_role_assignment.clustermesh_vnet_contributor,
+  ]
+}
diff --git a/modules/terraform/azure/variables.tf b/modules/terraform/azure/variables.tf
index 0c57fc6869..deb028690d 100644
--- a/modules/terraform/azure/variables.tf
+++ b/modules/terraform/azure/variables.tf
@@ -472,6 +472,7 @@ variable "aks_cli_config_list" {
 
     managed_identity_name             = optional(string, null)
     subnet_name                       = optional(string, null)
+    pod_subnet_name                   = optional(string, null)
     kubernetes_version                = optional(string, null)
     aks_custom_headers                = optional(list(string), [])
     use_custom_configurations         = optional(bool, false)
@@ -586,3 +587,32 @@ variable "disk_encryption_set_config_list" {
   }
 }
 
+
+# =============================================================================
+# ClusterMesh additions (optional; used by the clustermesh-scale scenario).
+# Both default to disabled so existing scenarios are unaffected.
+# =============================================================================
+
+variable "vnet_peering_config" {
+  description = "Pairwise VNet peering across all VNets in network_config_list. Keys are stable src_role-dst_role so adding a cluster does not churn existing peerings."
+  type = object({
+    enabled = optional(bool, false)
+  })
+  default = {}
+}
+
+variable "fleet_config" {
+  description = "Azure Fleet + ClusterMesh profile. When enabled, provisions a Fleet resource, one member per entry in members (labeled member_label_key=member_label_value), and creates+applies a clustermeshprofile via local-exec against the private-preview az fleet CLI (see modules/terraform/azure/fleet/)."
+  type = object({
+    enabled            = optional(bool, false)
+    fleet_name         = optional(string, "")
+    cmp_name           = optional(string, "")
+    member_label_key   = optional(string, "mesh")
+    member_label_value = optional(string, "true")
+    members = optional(list(object({
+      member_name = string
+      aks_role    = string
+    })), [])
+  })
+  default = {}
+}
diff --git a/modules/terraform/azure/vnet-peering/main.tf b/modules/terraform/azure/vnet-peering/main.tf
new file mode 100644
index 0000000000..20ffa88fbf
--- /dev/null
+++ b/modules/terraform/azure/vnet-peering/main.tf
@@ -0,0 +1,40 @@
+# =============================================================================
+# VNet peering submodule — pairwise mesh
+#
+# Mirrors Step 3b in fleet-setup-script.sh (SHARED_VNET=false mode):
+# creates az network vnet peering create in both directions for every ordered
+# pair (src, dst) with src != dst, over the VNets in var.vnet_role_to_id.
+#
+# for_each keys are the stable string "${src_role}->${dst_role}", so adding a
+# new cluster role does NOT churn peerings that already exist between other pairs.
+# =============================================================================
+
+locals {
+  peering_pairs = var.peering_enabled ? {
+    for pair in flatten([
+      for src_role, src_id in var.vnet_role_to_id : [
+        for dst_role, dst_id in var.vnet_role_to_id : {
+          key      = "${src_role}->${dst_role}"
+          src_role = src_role
+          dst_role = dst_role
+          src_id   = src_id
+          dst_id   = dst_id
+          src_name = var.vnet_role_to_name[src_role]
+        } if src_role != dst_role
+      ]
+    ]) : pair.key => pair
+  } : {}
+}
+
+resource "azurerm_virtual_network_peering" "peering" {
+  for_each = local.peering_pairs
+
+  name                         = "${each.value.src_name}-to-${each.value.dst_role}"
+  resource_group_name          = var.resource_group_name
+  virtual_network_name         = each.value.src_name
+  remote_virtual_network_id    = each.value.dst_id
+  allow_virtual_network_access = true
+  allow_forwarded_traffic      = false
+  allow_gateway_transit        = false
+  use_remote_gateways          = false
+}
diff --git a/modules/terraform/azure/vnet-peering/outputs.tf b/modules/terraform/azure/vnet-peering/outputs.tf
new file mode 100644
index 0000000000..d8f9d9f69e
--- /dev/null
+++ b/modules/terraform/azure/vnet-peering/outputs.tf
@@ -0,0 +1,4 @@
+output "peering_keys" {
+  description = "List of peering keys (src_role->dst_role) that were created."
+  value       = keys(azurerm_virtual_network_peering.peering)
+}
diff --git a/modules/terraform/azure/vnet-peering/variables.tf b/modules/terraform/azure/vnet-peering/variables.tf
new file mode 100644
index 0000000000..7aabadcf7b
--- /dev/null
+++ b/modules/terraform/azure/vnet-peering/variables.tf
@@ -0,0 +1,22 @@
+variable "peering_enabled" {
+  description = "Whether to create pairwise VNet peerings between all VNets in vnet_role_to_id."
+  type        = bool
+  default     = false
+}
+
+variable "vnet_role_to_id" {
+  description = "Map of network role => VNet resource ID. Every pair (a, b) with a != b gets two peerings (a->b and b->a)."
+  type        = map(string)
+  default     = {}
+}
+
+variable "vnet_role_to_name" {
+  description = "Map of network role => VNet name. Used to name the peering resource on the source VNet."
+  type        = map(string)
+  default     = {}
+}
+
+variable "resource_group_name" {
+  description = "Resource group containing all VNets."
+  type        = string
+}
diff --git a/pipelines/perf-eval/Network Benchmark/clustermesh-scale.yml b/pipelines/perf-eval/Network Benchmark/clustermesh-scale.yml
new file mode 100644
index 0000000000..caaedc0ea0
--- /dev/null
+++ b/pipelines/perf-eval/Network Benchmark/clustermesh-scale.yml	
@@ -0,0 +1,69 @@
+trigger: none
+
+pool: AKS-Telescope-Airlock
+
+schedules:
+  - cron: "0 4 * * 0"
+    displayName: Weekly Sunday 4am clustermesh scale test
+    branches:
+      include:
+        - main
+    always: false
+
+variables:
+  SCENARIO_TYPE: perf-eval
+  SCENARIO_NAME: clustermesh-scale
+  OWNER: aks
+
+stages:
+  - stage: azure_eastus2euap
+    dependsOn: []
+    jobs:
+      - template: /jobs/competitive-test.yml
+        parameters:
+          cloud: azure
+          regions:
+            - eastus2euap
+          engine: clusterloader2
+          engine_input:
+            image: "ghcr.io/azure/clusterloader2:v20250513"
+            install: false
+            operation_timeout: 15m
+          topology: clustermesh-scale
+          terraform_input_file_mapping:
+            - eastus2euap: "scenarios/perf-eval/clustermesh-scale/terraform-inputs/azure-2.tfvars"
+          matrix:
+            n2:
+              cluster_count: 2
+              mesh_size: 2
+              cl2_config_file: config.yaml
+              test_type: default-config
+              namespaces: 1
+              deployments_per_namespace: 2
+              replicas_per_deployment: 2
+              hold_duration: 30s
+              warmup_duration: 10s
+              restart_count: 0
+              api_server_calls_per_second: 5
+              trigger_reason: ${{ variables['Build.Reason'] }}
+            n2_event_throughput:
+              cluster_count: 2
+              mesh_size: 2
+              cl2_config_file: event-throughput.yaml
+              test_type: event-throughput
+              namespaces: 5
+              deployments_per_namespace: 4
+              replicas_per_deployment: 10
+              hold_duration: 2m
+              warmup_duration: 30s
+              restart_count: 1
+              api_server_calls_per_second: 20
+              trigger_reason: ${{ variables['Build.Reason'] }}
+          max_parallel: 1
+          timeout_in_minutes: 120
+          credential_type: service_connection
+          ssh_key_enabled: false
+          # Iteration-only: skip uploading results to the telescope blob while
+          # we're still stabilizing the clustermesh-scale pipeline. Flip to
+          # false (or remove) once results are meaningful.
+          skip_publish: true
diff --git a/pipelines/system/new-pipeline-test.yml b/pipelines/system/new-pipeline-test.yml
index 63d55f02d9..38ea068658 100644
--- a/pipelines/system/new-pipeline-test.yml
+++ b/pipelines/system/new-pipeline-test.yml
@@ -1,25 +1,66 @@
 trigger: none
 
+pool: AKS-Telescope-Airlock
+
+schedules:
+  - cron: "0 4 * * 0"
+    displayName: Weekly Sunday 4am clustermesh scale test
+    branches:
+      include:
+        - main
+    always: false
+
 variables:
-  SCENARIO_TYPE: <scenario-type>
-  SCENARIO_NAME: <scenario-name>
+  SCENARIO_TYPE: perf-eval
+  SCENARIO_NAME: clustermesh-scale
+  OWNER: aks
 
 stages:
-  - stage: <stage-name> # format: <cloud>[_<region>]+ (e.g. azure_eastus2, aws_eastus_westus)
+  - stage: azure_eastus2euap
     dependsOn: []
     jobs:
-      - template: /jobs/competitive-test.yml # must keep as is
+      - template: /jobs/competitive-test.yml
         parameters:
-          cloud: <cloud> # e.g. azure, aws
-          regions: # list of regions
-            - region1 # e.g. eastus2
-          topology: <topology> # e.g. cluster-autoscaler
-          engine: <engine> # e.g. clusterloader2
-          matrix: # list of test parameters to customize the provisioned resources
-            <case-name>:
-              <key1>: <value1>
-              <key2>: <value2>
-          max_parallel: <number of concurrent jobs> # required
-          credential_type: service_connection # required
+          cloud: azure
+          regions:
+            - eastus2euap
+          engine: clusterloader2
+          engine_input:
+            image: "ghcr.io/azure/clusterloader2:v20250513"
+            install: false
+            operation_timeout: 15m
+          topology: clustermesh-scale
+          terraform_input_file_mapping:
+            - eastus2euap: "scenarios/perf-eval/clustermesh-scale/terraform-inputs/azure-2.tfvars"
+          matrix:
+            # Mirror pipelines/perf-eval/Network Benchmark/clustermesh-scale.yml
+            # so dev runs use the same matrix-var plumbing as production.
+            # Auto-exported as uppercase env vars (NAMESPACES, MESH_SIZE, etc.)
+            # by AzDO and consumed in steps/engine/clusterloader2/clustermesh-scale/execute.yml.
+            #
+            # Production clustermesh-scale.yml also has an `n2` trivial-vertical-slice
+            # entry. We don't run it in dev — n2_event_throughput already exercises
+            # the full plumbing and per-run cost (full Fleet/AKS lifecycle ~15-20 min)
+            # makes a second axis expensive during iteration.
+            n2_event_throughput:
+              cluster_count: 2
+              mesh_size: 2
+              cl2_config_file: event-throughput.yaml
+              test_type: event-throughput
+              namespaces: 5
+              deployments_per_namespace: 4
+              replicas_per_deployment: 10
+              hold_duration: 2m
+              warmup_duration: 30s
+              restart_count: 1
+              api_server_calls_per_second: 20
+              trigger_reason: ${{ variables['Build.Reason'] }}
+          max_parallel: 1
+          timeout_in_minutes: 120
+          credential_type: service_connection
           ssh_key_enabled: false
-          timeout_in_minutes: 60 # if not specified, default is 60
+          # Iteration-only: skip uploading results to the telescope blob while
+          # we're still stabilizing the clustermesh-scale pipeline. Mirrors the
+          # same flag in pipelines/perf-eval/Network Benchmark/clustermesh-scale.yml.
+          # Flip to false (or remove) once results are meaningful.
+          skip_publish: true
diff --git a/scenarios/perf-eval/clustermesh-scale/terraform-inputs/azure-2.tfvars b/scenarios/perf-eval/clustermesh-scale/terraform-inputs/azure-2.tfvars
new file mode 100644
index 0000000000..535bdba5a7
--- /dev/null
+++ b/scenarios/perf-eval/clustermesh-scale/terraform-inputs/azure-2.tfvars
@@ -0,0 +1,179 @@
+scenario_type  = "perf-eval"
+scenario_name  = "clustermesh-scale"
+deletion_delay = "4h"
+owner          = "aks"
+
+# =============================================================================
+# ClusterMesh Scale Test — 2 cluster tier
+#
+# Mirrors fleet-setup-script.sh with SHARED_VNET=false (separate VNets + peering).
+# - 2 VNets (one per cluster) at 10.<id>.0.0/16
+# - Per-cluster node subnet (10.<id>.0.0/24, 254 IPs) + pod subnet (10.<id>.4.0/22, 1022 IPs)
+# - 2 AKS clusters with Cilium + ACNS, Azure CNI w/ pod subnet (not overlay)
+# - Pairwise VNet peering between the two VNets (both directions)
+# - Fleet + 2 fleet members (label mesh=true) + clustermeshprofile
+#
+# Pod subnet sizing: /22 (1022 IPs) is the floor for any Phase 2 scenario in
+# this tier. Math: ~70 baseline pods (kube-system + AKS add-ons across 2 nodes)
+# + 200 workload pods (event-throughput n2 tier: 5 ns x 4 dep x 10 replicas)
+# = ~270 pods/cluster, plus headroom for future churn-stress / HA scenarios
+# without re-touching the network plan. /24 (254 IPs) was insufficient.
+# Larger tiers (n5/n10/n20 in Phase 3) will get their own tfvars files with
+# subnets sized for their cluster + pod counts.
+#
+# Naming:
+#   VNet role         : mesh-1, mesh-2                (one VNet per role)
+#   AKS role          : mesh-1, mesh-2                (one AKS per role)
+#   AKS cluster name  : clustermesh-1, clustermesh-2
+#   Fleet member name : mesh-1, mesh-2                (intentionally != cluster name)
+#   Fleet name        : clustermesh-flt
+#   Profile name      : clustermesh-cmp
+# =============================================================================
+
+network_config_list = [
+  {
+    role               = "mesh-1"
+    vnet_name          = "clustermesh-1-vnet"
+    vnet_address_space = "10.1.0.0/16"
+    subnet = [
+      {
+        name           = "clustermesh-1-node"
+        address_prefix = "10.1.0.0/24"
+      },
+      {
+        name           = "clustermesh-1-pod"
+        address_prefix = "10.1.4.0/22"
+      }
+    ]
+    network_security_group_name = ""
+    nic_public_ip_associations  = []
+    nsr_rules                   = []
+  },
+  {
+    role               = "mesh-2"
+    vnet_name          = "clustermesh-2-vnet"
+    vnet_address_space = "10.2.0.0/16"
+    subnet = [
+      {
+        name           = "clustermesh-2-node"
+        address_prefix = "10.2.0.0/24"
+      },
+      {
+        name           = "clustermesh-2-pod"
+        address_prefix = "10.2.4.0/22"
+      }
+    ]
+    network_security_group_name = ""
+    nic_public_ip_associations  = []
+    nsr_rules                   = []
+  }
+]
+
+aks_cli_config_list = [
+  {
+    role                          = "mesh-1"
+    aks_name                      = "clustermesh-1"
+    sku_tier                      = "Standard"
+    subnet_name                   = "clustermesh-1-node"
+    pod_subnet_name               = "clustermesh-1-pod"
+    use_aks_preview_cli_extension = true
+
+    optional_parameters = [
+      { name = "generate-ssh-keys", value = "" },
+      { name = "network-plugin", value = "azure" },
+      { name = "network-dataplane", value = "cilium" },
+      { name = "enable-acns", value = "" },
+      # AKS default is 30 pods/node. Phase-2 event-throughput workload runs
+      # 5ns x 4dep x 10 replicas = 200 pods per cluster; with 2 default-pool
+      # nodes that's 100/node, so we need ≥110 to leave headroom for Cilium
+      # agent, ACNS daemons, monitoring stack, and kube-system pods. Azure
+      # CNI with pod subnet supports up to 250.
+      { name = "max-pods", value = "110" },
+    ]
+
+    # Default pool sizing: D4s_v5 (4 vCPU / 16GB) is enough for the workload
+    # pods alone. Prometheus is pinned to prompool below — without that
+    # split, Prometheus's 1Gi+ memory request co-tenanting on default-pool
+    # nodes caused per-node CPU overcommit (~160% allocatable) and left
+    # workload pods stuck Pending.
+    default_node_pool = {
+      name                 = "default"
+      node_count           = 2
+      auto_scaling_enabled = false
+      vm_size              = "Standard_D4s_v5"
+    }
+    # Dedicated Prometheus node, labeled `prometheus=true`. CL2 is
+    # configured (in modules/python/clusterloader2/clustermesh-scale/scale.py
+    # via CL2_PROMETHEUS_NODE_SELECTOR) to schedule the prometheus-k8s pod
+    # only on this label, so it doesn't compete with workload pods. Mirrors
+    # the `prompool` pattern from
+    # scenarios/perf-eval/cnl-azurecni-overlay-cilium/terraform-inputs/azure.tfvars.
+    # D8s_v3 (8 vCPU / 32GB) is sized for our 1Gi-request Prometheus with
+    # ample headroom — much smaller than #1053's D32s_v5 because our
+    # workload spec is also much smaller.
+    extra_node_pool = [
+      {
+        name                 = "prompool"
+        node_count           = 1
+        auto_scaling_enabled = false
+        vm_size              = "Standard_D8s_v3"
+        optional_parameters = [
+          { name = "labels", value = "prometheus=true" },
+        ]
+      },
+    ]
+  },
+  {
+    role                          = "mesh-2"
+    aks_name                      = "clustermesh-2"
+    sku_tier                      = "Standard"
+    subnet_name                   = "clustermesh-2-node"
+    pod_subnet_name               = "clustermesh-2-pod"
+    use_aks_preview_cli_extension = true
+
+    optional_parameters = [
+      { name = "generate-ssh-keys", value = "" },
+      { name = "network-plugin", value = "azure" },
+      { name = "network-dataplane", value = "cilium" },
+      { name = "enable-acns", value = "" },
+      { name = "max-pods", value = "110" },
+    ]
+
+    default_node_pool = {
+      name                 = "default"
+      node_count           = 2
+      auto_scaling_enabled = false
+      vm_size              = "Standard_D4s_v5"
+    }
+    extra_node_pool = [
+      {
+        name                 = "prompool"
+        node_count           = 1
+        auto_scaling_enabled = false
+        vm_size              = "Standard_D8s_v3"
+        optional_parameters = [
+          { name = "labels", value = "prometheus=true" },
+        ]
+      },
+    ]
+  }
+]
+
+# =============================================================================
+# Fleet + ClusterMesh (new vars in this scenario)
+# =============================================================================
+vnet_peering_config = {
+  enabled = true
+}
+
+fleet_config = {
+  enabled            = true
+  fleet_name         = "clustermesh-flt"
+  cmp_name           = "clustermesh-cmp"
+  member_label_key   = "mesh"
+  member_label_value = "true"
+  members = [
+    { member_name = "mesh-1", aks_role = "mesh-1" },
+    { member_name = "mesh-2", aks_role = "mesh-2" }
+  ]
+}
diff --git a/scenarios/perf-eval/clustermesh-scale/terraform-test-inputs/azure-2.json b/scenarios/perf-eval/clustermesh-scale/terraform-test-inputs/azure-2.json
new file mode 100644
index 0000000000..b2a8243a56
--- /dev/null
+++ b/scenarios/perf-eval/clustermesh-scale/terraform-test-inputs/azure-2.json
@@ -0,0 +1,4 @@
+{
+  "run_id": "cmesh2test",
+  "region": "westus2"
+}
diff --git a/scenarios/perf-eval/clustermesh-scale/vendor/fleet-2.0.4-py3-none-any.whl b/scenarios/perf-eval/clustermesh-scale/vendor/fleet-2.0.4-py3-none-any.whl
new file mode 100644
index 0000000000..68bf9f5746
Binary files /dev/null and b/scenarios/perf-eval/clustermesh-scale/vendor/fleet-2.0.4-py3-none-any.whl differ
diff --git a/steps/engine/clusterloader2/clustermesh-scale/collect.yml b/steps/engine/clusterloader2/clustermesh-scale/collect.yml
new file mode 100644
index 0000000000..6a879a2c58
--- /dev/null
+++ b/steps/engine/clusterloader2/clustermesh-scale/collect.yml
@@ -0,0 +1,88 @@
+parameters:
+  - name: cloud
+    type: string
+    default: ""
+  - name: engine_input
+    type: object
+    default: {}
+  - name: region
+    type: string
+
+steps:
+  - template: /steps/cloud/${{ parameters.cloud }}/collect-cloud-info.yml
+    parameters:
+      region: ${{ parameters.region }}
+
+  - script: |
+      set -eo pipefail
+      set -x
+
+      # Re-export matrix vars under CL2_*/MESH_SIZE/TEST_TYPE names that scale.py
+      # collect expects. Same workaround as execute.yml — matrix-var `$()`
+      # macros don't expand reliably in `env:` blocks.
+      export CL2_NAMESPACES="$NAMESPACES"
+      export CL2_DEPLOYMENTS_PER_NAMESPACE="$DEPLOYMENTS_PER_NAMESPACE"
+      export CL2_REPLICAS_PER_DEPLOYMENT="$REPLICAS_PER_DEPLOYMENT"
+      export MESH_SIZE="${MESH_SIZE:-$CLUSTERMESH_COUNT}"
+      export TEST_TYPE="${TEST_TYPE:-default-config}"
+      export TRIGGER_REASON="${TRIGGER_REASON:-$BUILD_REASON}"
+
+      clusters=$(cat "$HOME/.kube/clustermesh-clusters.json")
+      cluster_count=$(echo "$clusters" | jq 'length')
+
+      # Aggregate every per-cluster JSONL into a single TEST_RESULTS_FILE.
+      # Each line carries `cluster: <role>` so downstream Kusto queries can
+      # group/filter by cluster across the mesh.
+      mkdir -p "$(dirname "$TEST_RESULTS_FILE")"
+      : > "$TEST_RESULTS_FILE"
+
+      for row in $(echo "$clusters" | jq -c '.[]'); do
+        role=$(echo "$row" | jq -r '.role')
+        report_dir="${CL2_REPORT_DIR}/${role}"
+
+        if [ ! -d "$report_dir" ]; then
+          echo "##vso[task.logissue type=warning;] $role: missing report dir $report_dir, skipping"
+          continue
+        fi
+
+        # If CL2 errored out before producing junit.xml (e.g. prometheus stack
+        # setup timeout), skip aggregation for this cluster — scale.py collect
+        # would crash on the missing file. The execute step already logged a
+        # warning per-cluster; we don't want to also abort the whole pipeline
+        # at collect time when partial data may be useful.
+        if [ ! -f "$report_dir/junit.xml" ]; then
+          echo "##vso[task.logissue type=warning;] $role: $report_dir/junit.xml not found (CL2 likely failed); skipping collect for this cluster"
+          continue
+        fi
+
+        per_cluster_result="${TEST_RESULTS_FILE%.*}.${role}.${TEST_RESULTS_FILE##*.}"
+
+        PYTHONPATH=$PYTHONPATH:$(pwd) python3 $PYTHON_SCRIPT_FILE collect \
+          --cl2_report_dir "$report_dir" \
+          --cloud_info "${CLOUD_INFO:-}" \
+          --run_id "$RUN_ID" \
+          --run_url "$RUN_URL" \
+          --result_file "$per_cluster_result" \
+          --start_timestamp "$START_TIME" \
+          --cluster-name "$role" \
+          --cluster-count "$cluster_count" \
+          --mesh-size "$MESH_SIZE" \
+          --test_type "$TEST_TYPE" \
+          --namespaces "$CL2_NAMESPACES" \
+          --deployments-per-namespace "$CL2_DEPLOYMENTS_PER_NAMESPACE" \
+          --replicas-per-deployment "$CL2_REPLICAS_PER_DEPLOYMENT" \
+          --trigger_reason "${TRIGGER_REASON:-}"
+
+        cat "$per_cluster_result" >> "$TEST_RESULTS_FILE"
+      done
+
+      echo "Aggregated results from $cluster_count clusters into $TEST_RESULTS_FILE"
+      wc -l "$TEST_RESULTS_FILE" || true
+    workingDirectory: modules/python
+    env:
+      CLOUD: ${{ parameters.cloud }}
+      RUN_URL: $(RUN_URL)
+      PYTHON_SCRIPT_FILE: $(Pipeline.Workspace)/s/modules/python/clusterloader2/clustermesh-scale/scale.py
+      CL2_REPORT_DIR: $(Pipeline.Workspace)/s/modules/python/clusterloader2/clustermesh-scale/results
+      BUILD_REASON: $(Build.Reason)
+    displayName: "Collect + aggregate results across clustermesh clusters"
diff --git a/steps/engine/clusterloader2/clustermesh-scale/execute.yml b/steps/engine/clusterloader2/clustermesh-scale/execute.yml
new file mode 100644
index 0000000000..cd82bc2d70
--- /dev/null
+++ b/steps/engine/clusterloader2/clustermesh-scale/execute.yml
@@ -0,0 +1,206 @@
+parameters:
+  - name: cloud
+    type: string
+    default: ""
+  - name: engine_input
+    type: object
+    default: {}
+  - name: region
+    type: string
+
+steps:
+  - script: |
+      echo "Set the start time for test execution"
+      startTimestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+      echo "Start: $startTimestamp"
+      echo "##vso[task.setvariable variable=START_TIME]$startTimestamp"
+    displayName: set up timestamp variable
+
+  - script: |
+      set -eo pipefail
+      set -x
+
+      # Matrix variables (namespaces, mesh_size, deployments_per_namespace,
+      # replicas_per_deployment, hold_duration, warmup_duration, restart_count,
+      # api_server_calls_per_second, test_type) are auto-exported by AzDO to
+      # the script as UPPERCASE env vars (e.g. NAMESPACES, MESH_SIZE). Re-export
+      # them under the CL2_* names that scale.py and the CL2 yaml templates
+      # (config.yaml / event-throughput.yaml) consume.
+      #
+      # Why this re-export rather than `env: CL2_NAMESPACES: $(namespaces)` in
+      # the YAML: AzDO's `$()` runtime macro does not expand matrix variables
+      # in `env:` block values (see prior failed run with literal '$(namespaces)'
+      # reaching python). Same pattern as
+      # steps/engine/clusterloader2/network-scale/execute.yml which references
+      # the auto-exported names directly.
+      export CL2_NAMESPACES="$NAMESPACES"
+      export CL2_DEPLOYMENTS_PER_NAMESPACE="$DEPLOYMENTS_PER_NAMESPACE"
+      export CL2_REPLICAS_PER_DEPLOYMENT="$REPLICAS_PER_DEPLOYMENT"
+      export CL2_API_SERVER_CALLS_PER_SECOND="$API_SERVER_CALLS_PER_SECOND"
+      export CL2_HOLD_DURATION="$HOLD_DURATION"
+      export CL2_WARMUP_DURATION="$WARMUP_DURATION"
+      export CL2_RESTART_GENERATION="$RESTART_COUNT"
+
+      # Same discovery pattern as topology/clustermesh-scale/validate-resources.yml.
+      # We re-run it here rather than relying on a step variable so this engine
+      # file can be invoked independently.
+      clusters=$(az resource list \
+        --resource-type Microsoft.ContainerService/managedClusters \
+        --location "$REGION" \
+        --query "[?tags.run_id=='${RUN_ID}' && starts_with(tags.role, 'mesh-')].{name:name, rg:resourceGroup, role:tags.role}" \
+        -o json)
+
+      cluster_count=$(echo "$clusters" | jq 'length')
+      if [ "$cluster_count" -lt 2 ]; then
+        echo "##vso[task.logissue type=error;] Expected >=2 clustermesh clusters, found $cluster_count"
+        exit 1
+      fi
+
+      echo "Running CL2 across $cluster_count clusters"
+      mkdir -p "$HOME/.kube"
+      echo "$clusters" > "$HOME/.kube/clustermesh-clusters.json"
+      echo "##vso[task.setvariable variable=CLUSTERMESH_COUNT]$cluster_count"
+
+      # CL2 overrides are written once — params are identical for every cluster
+      # in this run (the per-cluster variation is which kubeconfig CL2 hits).
+      PYTHONPATH=$PYTHONPATH:$(pwd) python3 $PYTHON_SCRIPT_FILE configure \
+        --namespaces "$CL2_NAMESPACES" \
+        --deployments-per-namespace "$CL2_DEPLOYMENTS_PER_NAMESPACE" \
+        --replicas-per-deployment "$CL2_REPLICAS_PER_DEPLOYMENT" \
+        --operation-timeout "${CL2_OPERATION_TIMEOUT:-15m}" \
+        --cl2_override_file "${CL2_CONFIG_DIR}/overrides.yaml"
+
+      # Per-cluster CL2 fan-out — sequential. Each invocation writes its own
+      # report dir at ${CL2_REPORT_DIR}/<role>/, so collect.yml can iterate the
+      # same way and tag results with --cluster-name.
+      failures=0
+      for row in $(echo "$clusters" | jq -c '.[]'); do
+        name=$(echo "$row" | jq -r '.name')
+        rg=$(echo "$row"   | jq -r '.rg')
+        role=$(echo "$row" | jq -r '.role')
+
+        echo "===================================================================="
+        echo "  Running CL2 on $role ($name)"
+        echo "===================================================================="
+
+        kubeconfig="$HOME/.kube/$role.config"
+        KUBECONFIG="$kubeconfig" az aks get-credentials \
+          --resource-group "$rg" --name "$name" --overwrite-existing --only-show-errors
+
+        report_dir="${CL2_REPORT_DIR}/${role}"
+        mkdir -p "$report_dir"
+
+        cl2_passed=0
+        # Run CL2; collect outcome WITHOUT failing the bash script (so we can
+        # also inspect junit.xml for internal test failures even when CL2 exits
+        # 0). Treat as "passed" only if BOTH:
+        #   (a) junit.xml exists (CL2 actually completed and wrote a report)
+        #   (b) junit.xml has zero <failure>/<error> elements
+        # Without (b) we'd silently green-light runs where measurements failed
+        # — e.g. PodMonitor template substitution producing "<no value>", which
+        # k8s admission rejects but CL2 still writes junit with <failure> tags.
+        PYTHONPATH=$PYTHONPATH:$(pwd) python3 $PYTHON_SCRIPT_FILE execute \
+            --cl2-image "${CL2_IMAGE}" \
+            --cl2-config-dir "${CL2_CONFIG_DIR}" \
+            --cl2-report-dir "$report_dir" \
+            --cl2-config-file "${CL2_CONFIG_FILE}" \
+            --kubeconfig "$kubeconfig" \
+            --provider "${CLOUD}" \
+          || true
+        if [ -f "$report_dir/junit.xml" ]; then
+          # Count failure/error attrs from <testsuite ... failures="N" errors="M">.
+          junit_failures=$(grep -oE 'failures="[0-9]+"' "$report_dir/junit.xml" | head -1 | grep -oE '[0-9]+' || echo 0)
+          junit_errors=$(grep -oE 'errors="[0-9]+"' "$report_dir/junit.xml" | head -1 | grep -oE '[0-9]+' || echo 0)
+          junit_failures=${junit_failures:-0}
+          junit_errors=${junit_errors:-0}
+          if [ "$junit_failures" -eq 0 ] && [ "$junit_errors" -eq 0 ]; then
+            cl2_passed=1
+          else
+            echo "##vso[task.logissue type=warning;] $role: junit.xml reports failures=$junit_failures errors=$junit_errors"
+          fi
+        fi
+
+        if [ "$cl2_passed" -eq 1 ]; then
+          echo "  $role: CL2 run succeeded"
+        fi
+
+        # Always-on log capture (spec line 35: "Logs: clustermesh-apiserver,
+        # agent watchers"). Files land in $report_dir/logs/ so they are
+        # uploaded alongside junit.xml + measurement results when the
+        # publish step runs. The same files double as immediate
+        # diagnostics for failed runs (see FAILURE DIAG block below).
+        log_dir="$report_dir/logs"
+        mkdir -p "$log_dir"
+        echo "------- $role: capturing pod logs to $log_dir -------"
+        # clustermesh-apiserver: all three containers (apiserver / etcd /
+        # kvstoremesh) — bounded tail, single pod expected.
+        for c in apiserver etcd kvstoremesh; do
+          KUBECONFIG="$kubeconfig" kubectl -n kube-system logs \
+            -l k8s-app=clustermesh-apiserver -c "$c" --tail=4000 \
+            > "$log_dir/clustermesh-apiserver-$c.log" 2>&1 || true
+        done
+        # cilium-agent: one pod per node — keep tail small to bound size.
+        KUBECONFIG="$kubeconfig" kubectl -n kube-system logs \
+          -l k8s-app=cilium --tail=1000 --prefix=true \
+          > "$log_dir/cilium-agent.log" 2>&1 || true
+        # cilium-operator: low-volume control plane.
+        KUBECONFIG="$kubeconfig" kubectl -n kube-system logs \
+          -l io.cilium/app=operator --tail=2000 --prefix=true \
+          > "$log_dir/cilium-operator.log" 2>&1 || true
+
+        if [ "$cl2_passed" -ne 1 ]; then
+          # Dump enough state to distinguish prometheus-stack scheduling
+          # failures from CL2 logic failures. Prometheus is the most common
+          # culprit here — its pod requests 10Gi by default, doesn't fit on
+          # Standard_D4s_v4. If the pod is Pending with FailedScheduling, the
+          # describe events make that obvious.
+          #
+          # Note: scale.py passes tear_down_prometheus=False so the stack
+          # survives this dump (otherwise CL2 would clean up before we look).
+          echo "------- $role: CL2 FAILURE DIAG -------"
+          echo "------- node allocatable / requested capacity -------"
+          KUBECONFIG="$kubeconfig" kubectl get nodes -o wide 2>&1 || true
+          KUBECONFIG="$kubeconfig" kubectl describe nodes 2>&1 | grep -A 4 "Allocatable\|Allocated resources" | head -40 || true
+
+          echo "------- monitoring/* pods -------"
+          KUBECONFIG="$kubeconfig" kubectl -n monitoring get pods -o wide 2>&1 || true
+
+          echo "------- monitoring statefulsets -------"
+          KUBECONFIG="$kubeconfig" kubectl -n monitoring get statefulset -o wide 2>&1 || true
+
+          echo "------- Prometheus CR (operator input) -------"
+          KUBECONFIG="$kubeconfig" kubectl -n monitoring get prometheus -o yaml 2>&1 | head -80 || true
+
+          echo "------- prometheus-k8s pod describe -------"
+          KUBECONFIG="$kubeconfig" kubectl -n monitoring describe pod -l app.kubernetes.io/name=prometheus 2>&1 | tail -60 || true
+
+          echo "------- prometheus-operator logs (tail 60) -------"
+          KUBECONFIG="$kubeconfig" kubectl -n monitoring logs -l app.kubernetes.io/name=prometheus-operator --tail=60 2>&1 || true
+
+          echo "------- monitoring namespace events (recent) -------"
+          KUBECONFIG="$kubeconfig" kubectl -n monitoring get events --sort-by='.lastTimestamp' 2>&1 | tail -30 || true
+          echo "------- end CL2 FAILURE DIAG -------"
+
+          echo "##vso[task.logissue type=warning;] $role: CL2 run failed (junit missing or has failures/errors at $report_dir/junit.xml; continuing other clusters)"
+          failures=$((failures + 1))
+        fi
+      done
+
+      if [ "$failures" -gt 0 ]; then
+        echo "##vso[task.logissue type=error;] CL2 failed on $failures cluster(s)"
+        exit 1
+      fi
+    workingDirectory: modules/python
+    env:
+      ${{ if eq(parameters.cloud, 'azure') }}:
+        CLOUD: aks
+      ${{ else }}:
+        CLOUD: ${{ parameters.cloud }}
+      REGION: ${{ parameters.region }}
+      PYTHON_SCRIPT_FILE: $(Pipeline.Workspace)/s/modules/python/clusterloader2/clustermesh-scale/scale.py
+      CL2_IMAGE: ${{ parameters.engine_input.image }}
+      CL2_CONFIG_DIR: $(Pipeline.Workspace)/s/modules/python/clusterloader2/clustermesh-scale/config
+      CL2_CONFIG_FILE: $(cl2_config_file)
+      CL2_REPORT_DIR: $(Pipeline.Workspace)/s/modules/python/clusterloader2/clustermesh-scale/results
+      CL2_OPERATION_TIMEOUT: ${{ parameters.engine_input.operation_timeout }}
+    displayName: "Run CL2 across all clustermesh clusters"
diff --git a/steps/setup-tests.yml b/steps/setup-tests.yml
index d790917dca..ed7840dc4c 100644
--- a/steps/setup-tests.yml
+++ b/steps/setup-tests.yml
@@ -72,6 +72,45 @@ steps:
       region: ${{ parameters.region }}
       credential_type: ${{ parameters.credential_type }}
 
+  - script: |
+      # Install the Azure Fleet preview CLI extension required by the
+      # clustermesh-scale scenario. The Fleet ClusterMeshProfile API surface
+      # is private-preview and only the bundled wheel exposes the
+      # `az fleet clustermeshprofile` and `az fleet member create --labels`
+      # commands invoked by terraform local-exec at provision time.
+      #
+      # The wheel is vendored in-repo at scenarios/perf-eval/clustermesh-scale/vendor/.
+      set -euo pipefail
+      whl="$(Pipeline.Workspace)/s/scenarios/perf-eval/$(SCENARIO_NAME)/vendor/fleet-2.0.4-py3-none-any.whl"
+      if [ ! -f "$whl" ]; then
+        echo "##vso[task.logissue type=error;] Vendored fleet wheel not found at $whl"
+        exit 1
+      fi
+      az extension remove --name fleet --only-show-errors 2>/dev/null || true
+      az extension add --source "$whl" --yes --only-show-errors
+      az fleet --help >/dev/null
+      az fleet clustermeshprofile --help >/dev/null
+      echo "Fleet preview CLI installed from $whl"
+    displayName: "Install Fleet preview CLI (clustermesh scenarios)"
+    condition: startsWith(variables['SCENARIO_NAME'], 'clustermesh')
+
+  - script: |
+      # Install cilium-cli on the runner for richer ClusterMesh diagnostics.
+      # `cilium clustermesh status --context <ctx>` reports per-remote-cluster
+      # connection state, endpoint counts, and version skew — info that the
+      # in-pod `cilium-dbg status` doesn't expose. Used by topology
+      # validate-resources.yml on each cluster context.
+      set -euo pipefail
+      CILIUM_CLI_VERSION=v0.16.20
+      CLI_ARCH=amd64
+      curl -sSL --fail --remote-name-all \
+        "https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz"
+      sudo tar xzvfC "cilium-linux-${CLI_ARCH}.tar.gz" /usr/local/bin
+      rm "cilium-linux-${CLI_ARCH}.tar.gz"
+      cilium version --client
+    displayName: "Install cilium-cli (clustermesh scenarios)"
+    condition: startsWith(variables['SCENARIO_NAME'], 'clustermesh')
+
   - script: |
       if [ -n "${TEST_MODULES_DIR}" ]; then
         test_modules_directory=$(Pipeline.Workspace)/s/${TEST_MODULES_DIR}
diff --git a/steps/topology/clustermesh-scale/collect-clusterloader2.yml b/steps/topology/clustermesh-scale/collect-clusterloader2.yml
new file mode 100644
index 0000000000..29f6c86b38
--- /dev/null
+++ b/steps/topology/clustermesh-scale/collect-clusterloader2.yml
@@ -0,0 +1,18 @@
+parameters:
+  - name: cloud
+    type: string
+    default: ""
+  - name: engine_input
+    type: object
+    default: {}
+  - name: regions
+    type: object
+    default: {}
+
+steps:
+  - template: /steps/set-run-id.yml
+  - template: /steps/engine/clusterloader2/clustermesh-scale/collect.yml
+    parameters:
+      cloud: ${{ parameters.cloud }}
+      engine_input: ${{ parameters.engine_input }}
+      region: ${{ parameters.regions[0] }}
diff --git a/steps/topology/clustermesh-scale/execute-clusterloader2.yml b/steps/topology/clustermesh-scale/execute-clusterloader2.yml
new file mode 100644
index 0000000000..eb1f53f7a4
--- /dev/null
+++ b/steps/topology/clustermesh-scale/execute-clusterloader2.yml
@@ -0,0 +1,17 @@
+parameters:
+  - name: cloud
+    type: string
+    default: ""
+  - name: engine_input
+    type: object
+    default: {}
+  - name: regions
+    type: object
+    default: {}
+
+steps:
+  - template: /steps/engine/clusterloader2/clustermesh-scale/execute.yml
+    parameters:
+      cloud: ${{ parameters.cloud }}
+      engine_input: ${{ parameters.engine_input }}
+      region: ${{ parameters.regions[0] }}
diff --git a/steps/topology/clustermesh-scale/validate-resources.yml b/steps/topology/clustermesh-scale/validate-resources.yml
new file mode 100644
index 0000000000..bfd47a11c6
--- /dev/null
+++ b/steps/topology/clustermesh-scale/validate-resources.yml
@@ -0,0 +1,402 @@
+parameters:
+  - name: cloud
+    type: string
+  - name: engine
+    type: string
+  - name: regions
+    type: object
+
+steps:
+  # -----------------------------------------------------------------------------
+  # Per-cluster validation: enumerate every fleet member, fetch its kubeconfig,
+  # assert nodes are Ready, cilium agent is Running, and the cluster reports
+  # mesh state Connected to all (N-1) remote clusters.
+  #
+  # Cluster discovery uses the same tag-based pattern as
+  # /steps/cloud/azure/update-kubeconfig.yml — clusters are tagged
+  # role=mesh-N at terraform-apply time.
+  # -----------------------------------------------------------------------------
+  - script: |
+      set -euo pipefail
+      set -x
+
+      region=${{ parameters.regions[0] }}
+
+      # JSON list of {name, rg, role} for every clustermesh AKS cluster in this run.
+      clusters=$(az resource list \
+        --resource-type Microsoft.ContainerService/managedClusters \
+        --location "$region" \
+        --query "[?tags.run_id=='${RUN_ID}' && starts_with(tags.role, 'mesh-')].{name:name, rg:resourceGroup, role:tags.role}" \
+        -o json)
+
+      count=$(echo "$clusters" | jq 'length')
+      if [ "$count" -lt 2 ]; then
+        echo "##vso[task.logissue type=error;] Expected >=2 clustermesh AKS clusters tagged run_id=${RUN_ID}, found $count"
+        exit 1
+      fi
+
+      echo "Discovered $count clustermesh clusters:"
+      echo "$clusters" | jq -r '.[] | "  \(.role): \(.name) in \(.rg)"'
+
+      mkdir -p "$HOME/.kube"
+      echo "$clusters" > "$HOME/.kube/clustermesh-clusters.json"
+
+      echo "##vso[task.setvariable variable=CLUSTERMESH_COUNT]$count"
+    displayName: "Enumerate clustermesh clusters"
+
+  - script: |
+      set -euo pipefail
+      set -x
+
+      clusters=$(cat "$HOME/.kube/clustermesh-clusters.json")
+      expected_remote=$(( $(echo "$clusters" | jq 'length') - 1 ))
+
+      failures=0
+      for row in $(echo "$clusters" | jq -c '.[]'); do
+        name=$(echo "$row" | jq -r '.name')
+        rg=$(echo "$row"   | jq -r '.rg')
+        role=$(echo "$row" | jq -r '.role')
+
+        echo "===================================================================="
+        echo "  Validating $role ($name)"
+        echo "===================================================================="
+
+        # Per-cluster kubeconfig file at $HOME/.kube/<role>.config — keeps each
+        # cluster's auth state isolated so concurrent kubectl calls don't race.
+        kubeconfig="$HOME/.kube/$role.config"
+        KUBECONFIG="$kubeconfig" az aks get-credentials \
+          --resource-group "$rg" --name "$name" --overwrite-existing --only-show-errors
+
+        export KUBECONFIG="$kubeconfig"
+
+        echo "--- nodes ---"
+        kubectl get nodes -o wide
+        kubectl wait --for=condition=Ready nodes --all --timeout=5m
+
+        echo "--- cilium agent pods ---"
+        kubectl -n kube-system get pods -l k8s-app=cilium -o wide
+        kubectl -n kube-system rollout status ds/cilium --timeout=5m
+
+        echo "--- clustermesh-apiserver pod ---"
+        kubectl -n kube-system get pods -l k8s-app=clustermesh-apiserver -o wide || true
+
+        # Surface the apiserver/kvstoremesh container ports so we can confirm
+        # the PodMonitor scrape targets (expected: apiserver=9963, kvstoremesh=9964)
+        # without needing to drop into a pod. Informational only — does not gate.
+        echo "--- clustermesh-apiserver exposed ports ---"
+        kubectl -n kube-system get pod -l k8s-app=clustermesh-apiserver \
+          -o jsonpath='{range .items[*].spec.containers[*]}{.name}:{range .ports[*]}{.name}={.containerPort} {end}{"\n"}{end}' \
+          2>/dev/null || true
+        echo
+
+        echo "--- cilium-dbg status (ClusterMesh section) ---"
+        # Retry up to ~5 minutes — the mesh propagation can lag a few seconds
+        # past az fleet clustermeshprofile apply's return.
+        # We use `cilium-dbg status` (in-pod debug binary) rather than the
+        # external `cilium clustermesh status` so we don't require cilium-cli
+        # on the agent. cilium-dbg status includes a "ClusterMesh:" block of
+        # the form:
+        #   ClusterMesh:   2/2 remote clusters ready, 0 global-services
+        #      mesh-2: ready, ...
+        # Retry up to ~10 minutes — the AKS-managed Cilium operator publishes
+        # the per-agent `cilium-clustermesh` Secret asynchronously after Fleet
+        # finishes profile apply, and the clustermesh-apiserver may be
+        # recreated mid-validation (cert/config rotation), bumping the wait
+        # another ~30s for agents to reload. Empirically 5 min was too tight
+        # for whichever cluster gets validated first; 10 min covers it with
+        # margin.
+        #
+        # Note: `cilium-dbg status` (in-pod, agent's local view) and
+        # `cilium clustermesh status` (CLI, queries clustermesh-apiserver) can
+        # disagree for several minutes during this window — the CLI flips to
+        # "configured/connected" first because it counts apiserver clients,
+        # while the in-pod view requires the Secret to be reloaded. We gate on
+        # the in-pod view because the data path needs the agent's local state.
+        connected=0
+        for i in $(seq 1 60); do
+          out=$(kubectl -n kube-system exec ds/cilium -- cilium-dbg status 2>&1 || true)
+          echo "$out"
+          # Parse "<ready>/<total> remote clusters ready" line.
+          ready=$(echo "$out" | sed -nE 's/.*ClusterMesh:[[:space:]]+([0-9]+)\/[0-9]+ remote clusters ready.*/\1/p' | head -1)
+          ready=${ready:-0}
+          if [ "$ready" -ge "$expected_remote" ]; then
+            connected=1
+            break
+          fi
+
+          # ============== DEBUG-DUMP-BEGIN (REMOVE BEFORE MERGE) ==============
+          # Every 6 iterations dump richer state: in-pod cilium-cli view of the
+          # mesh, clustermesh-apiserver pod state, and Fleet-side member status.
+          # These help diagnose why convergence is stalling. Strip before final
+          # PR review.
+          if [ "$((i % 6))" -eq 0 ]; then
+            echo "------- [debug] retry $i: cilium clustermesh status (runner cli) -------"
+            cilium clustermesh status --context "$(kubectl config current-context)" --wait=false 2>&1 || true
+
+            echo "------- [debug] retry $i: clustermesh-apiserver pods -------"
+            kubectl -n kube-system get pods -l k8s-app=clustermesh-apiserver -o wide 2>&1 || true
+            kubectl -n kube-system describe pods -l k8s-app=clustermesh-apiserver 2>&1 | tail -40 || true
+
+            echo "------- [debug] retry $i: clustermesh-apiserver service -------"
+            # Service of type LoadBalancer for the clustermesh-apiserver. If
+            # EXTERNAL-IP stays "<pending>", the AKS control-plane identity is
+            # missing Network Contributor on the VNet (cloud-controller-manager
+            # cannot provision the internal LB). Look in describe events for
+            # AuthorizationFailed / forbidden messages.
+            kubectl -n kube-system get svc clustermesh-apiserver -o wide 2>&1 || true
+            kubectl -n kube-system describe svc clustermesh-apiserver 2>&1 | tail -25 || true
+
+            echo "------- [debug] retry $i: cilium agent restarts / readiness -------"
+            kubectl -n kube-system get pods -l k8s-app=cilium -o wide 2>&1 || true
+
+            echo "------- [debug] retry $i: Fleet ClusterMeshProfile profile-level status -------"
+            # Profile-level mesh state (NotConnected/Connecting/Connected/Failed)
+            # plus the last operation error if any. This is the authoritative
+            # control-plane view of whether the mesh has converged.
+            az fleet clustermeshprofile show \
+              --resource-group "$rg" \
+              --fleet-name clustermesh-flt \
+              --name clustermesh-cmp \
+              --query "{state:properties.status.state, provisioningState:properties.provisioningState, lastError:properties.status.lastOperationError}" \
+              -o jsonc 2>&1 || true
+
+            echo "------- [debug] retry $i: Fleet ClusterMeshProfile members (connection state) -------"
+            # Per-member: provisioningState is just ARM-level (join accepted);
+            # meshProperties.status.state is the actual Cilium connection state.
+            az fleet clustermeshprofile list-members \
+              --resource-group "$rg" \
+              --fleet-name clustermesh-flt \
+              --name clustermesh-cmp \
+              --query "[].{name:name, provisioning:properties.provisioningState, mesh:properties.meshProperties.status.state, lastUpdated:properties.meshProperties.status.lastUpdatedAt, error:properties.meshProperties.status.error.message}" \
+              -o table 2>&1 || true
+          fi
+          # =============== DEBUG-DUMP-END (REMOVE BEFORE MERGE) ===============
+
+          echo "  waiting for $expected_remote remote clusters to be ready (got $ready), retry $i/60..."
+          sleep 10
+        done
+
+        if [ "$connected" -ne 1 ]; then
+          echo "##vso[task.logissue type=error;] $role: clustermesh not Connected to $expected_remote remote clusters"
+          failures=$((failures + 1))
+        fi
+
+        echo "--- cilium clustermesh status (runner-side, richer diagnostics) ---"
+        # Best-effort, informational only — failures here don't fail the step
+        # because the in-pod check above is authoritative. cilium-cli reports
+        # per-remote connection state, endpoint counts, and version info.
+        cilium clustermesh status --context "$(kubectl config current-context)" --wait=false || true
+      done
+
+      if [ "$failures" -gt 0 ]; then
+        echo "##vso[task.logissue type=error;] $failures cluster(s) failed mesh validation"
+        exit 1
+      fi
+    displayName: "Validate Cilium + ClusterMesh on every cluster"
+
+  - script: |
+      set -euo pipefail
+      set -x
+
+      # Cross-cluster data-path smoke: deploy a `global` service backed by an
+      # echo pod in the first cluster, deploy a curl client in the second
+      # cluster, and curl the service by name. If global service load-balancing
+      # works, the request resolves cross-cluster via the mesh data path.
+      #
+      # Per plan.md Phase 1 exit criteria, we don't ship a "green" Phase 1 that
+      # only validated control plane.
+
+      clusters=$(cat "$HOME/.kube/clustermesh-clusters.json")
+      first_role=$(echo  "$clusters" | jq -r '.[0].role')
+      second_role=$(echo "$clusters" | jq -r '.[1].role')
+
+      kc_first="$HOME/.kube/$first_role.config"
+      kc_second="$HOME/.kube/$second_role.config"
+
+      ns="cm-smoke"
+
+      cleanup() {
+        KUBECONFIG="$kc_first"  kubectl delete ns "$ns" --ignore-not-found --wait=false || true
+        KUBECONFIG="$kc_second" kubectl delete ns "$ns" --ignore-not-found --wait=false || true
+      }
+      trap cleanup EXIT
+
+      cat <<'EOF' > /tmp/cm-smoke-server.yaml
+      apiVersion: v1
+      kind: Namespace
+      metadata:
+        name: cm-smoke
+        annotations:
+          # AKS managed Cilium gates clustermesh sync at the *namespace* level
+          # by default (CFP-39876, "managed Cilium" change). Without this,
+          # neither pod identities, endpoints, nor services in this namespace
+          # are synced across clusters — even with service.cilium.io/global on
+          # the Service. This is the load-bearing annotation here; the
+          # service-level one below is kept for explicitness.
+          clustermesh.cilium.io/global: "true"
+      ---
+      apiVersion: apps/v1
+      kind: Deployment
+      metadata:
+        name: echo
+        namespace: cm-smoke
+      spec:
+        replicas: 1
+        selector:
+          matchLabels: { app: echo }
+        template:
+          metadata:
+            labels: { app: echo }
+          spec:
+            containers:
+              - name: echo
+                image: registry.k8s.io/e2e-test-images/agnhost:2.47
+                args: ["netexec", "--http-port=8080"]
+                ports: [{ containerPort: 8080 }]
+      ---
+      apiVersion: v1
+      kind: Service
+      metadata:
+        name: echo
+        namespace: cm-smoke
+        annotations:
+          # The namespace annotation above is what actually gates sync in AKS
+          # managed Cilium; this service-level annotation is kept for explicit
+          # intent and forward-compatibility.
+          service.cilium.io/global: "true"
+      spec:
+        selector: { app: echo }
+        ports:
+          - port: 80
+            targetPort: 8080
+      EOF
+
+      cat <<'EOF' > /tmp/cm-smoke-client.yaml
+      apiVersion: v1
+      kind: Namespace
+      metadata:
+        name: cm-smoke
+        annotations:
+          clustermesh.cilium.io/global: "true"
+      ---
+      # Cilium global services require the same Service name to exist in every
+      # participating cluster. The Service in cluster 2 has no local backends;
+      # cross-cluster lookup resolves to cluster 1's pods via the mesh.
+      apiVersion: v1
+      kind: Service
+      metadata:
+        name: echo
+        namespace: cm-smoke
+        annotations:
+          service.cilium.io/global: "true"
+      spec:
+        selector: { app: echo }
+        ports:
+          - port: 80
+            targetPort: 8080
+      ---
+      apiVersion: v1
+      kind: Pod
+      metadata:
+        name: curl
+        namespace: cm-smoke
+        labels: { app: curl }
+      spec:
+        restartPolicy: Never
+        containers:
+          - name: curl
+            image: curlimages/curl:8.10.1
+            command: ["sleep", "600"]
+      EOF
+
+      KUBECONFIG="$kc_first"  kubectl apply -f /tmp/cm-smoke-server.yaml
+      KUBECONFIG="$kc_second" kubectl apply -f /tmp/cm-smoke-client.yaml
+
+      KUBECONFIG="$kc_first"  kubectl -n "$ns" rollout status deploy/echo --timeout=3m
+      KUBECONFIG="$kc_second" kubectl -n "$ns" wait --for=condition=Ready pod/curl --timeout=3m
+
+      # Give Cilium clustermesh a moment to sync the new global Service from
+      # cluster 1 → cluster 2 before the first curl attempt. Empirically this
+      # is sub-second once mesh is converged, but we've already paid the cost
+      # of waiting for rollouts above so a small settle here doesn't matter.
+      sleep 15
+
+      # Try for 2 minutes — global service endpoints can take a few seconds
+      # to populate via the mesh.
+      ok=0
+      for i in $(seq 1 24); do
+        if KUBECONFIG="$kc_second" kubectl -n "$ns" exec curl -- \
+            curl -fsS -m 5 http://echo.cm-smoke.svc.cluster.local/hostname; then
+          ok=1
+          echo ""
+          echo "Cross-cluster curl succeeded on attempt $i"
+          break
+        fi
+        echo "  attempt $i/24 failed, retrying in 5s..."
+        sleep 5
+      done
+
+      if [ "$ok" -ne 1 ]; then
+        # ============== SMOKE-FAILURE-DEBUG-DUMP (REMOVE BEFORE MERGE) ==============
+        # On failure, dump enough state to distinguish Cilium global-service
+        # sync issues from cross-VNet pod-IP routing issues. Specifically:
+        #   1. cilium clustermesh status — should show "Global services: 1" if sync OK
+        #   2. cilium service list (in-pod) — should have an entry for cm-smoke/echo
+        #      with remote-cluster backends in cluster 2
+        #   3. kubectl describe svc / get endpoints echo — k8s view (cluster 2 should
+        #      have NO local endpoints, that's expected)
+        #   4. From inside the curl pod: DNS resolve, then direct-IP curl to a
+        #      cluster-1 echo pod IP — bypasses ClusterIP, tests raw L3 across VNets
+        echo
+        echo "================ SMOKE FAILURE DIAG (cluster $first_role -- backend) ================"
+        KUBECONFIG="$kc_first"  cilium clustermesh status --context "$(KUBECONFIG="$kc_first"  kubectl config current-context)" --wait=false 2>&1 || true
+        KUBECONFIG="$kc_first"  kubectl -n "$ns" describe svc echo 2>&1 || true
+        KUBECONFIG="$kc_first"  kubectl -n "$ns" get endpoints echo -o wide 2>&1 || true
+        KUBECONFIG="$kc_first"  kubectl -n "$ns" get pods -l app=echo -o wide 2>&1 || true
+        echo "------- $first_role: cilium-config (clustermesh-relevant flags) -------"
+        # Authoritative source for whether the cilium agent is configured to
+        # process global services. Look for: enable-cluster-mesh,
+        # cluster-mesh-shared-services, clustermesh-config, identity-allocation-mode,
+        # enable-services. AKS/ACNS may gate global services with a feature flag.
+        KUBECONFIG="$kc_first"  kubectl -n kube-system get cm cilium-config -o yaml 2>&1 \
+          | grep -iE 'cluster-mesh|clustermesh|service|global|identity' || true
+        echo "------- $first_role: cilium service list (full, head 40) -------"
+        KUBECONFIG="$kc_first"  kubectl -n kube-system exec ds/cilium -c cilium-agent -- cilium service list 2>&1 | head -40 || true
+        echo "------- $first_role: cilium-operator logs (tail 60) -------"
+        KUBECONFIG="$kc_first"  kubectl -n kube-system logs -l io.cilium/app=operator --tail=60 2>&1 \
+          | grep -iE 'global|clustermesh|cluster-mesh|cm-smoke|service' || true
+
+        echo
+        echo "================ SMOKE FAILURE DIAG (cluster $second_role -- client) ================"
+        KUBECONFIG="$kc_second" cilium clustermesh status --context "$(KUBECONFIG="$kc_second" kubectl config current-context)" --wait=false 2>&1 || true
+        KUBECONFIG="$kc_second" kubectl -n "$ns" describe svc echo 2>&1 || true
+        KUBECONFIG="$kc_second" kubectl -n "$ns" get endpoints echo -o wide 2>&1 || true
+        echo "------- $second_role: cilium-config (clustermesh-relevant flags) -------"
+        KUBECONFIG="$kc_second" kubectl -n kube-system get cm cilium-config -o yaml 2>&1 \
+          | grep -iE 'cluster-mesh|clustermesh|service|global|identity' || true
+        echo "------- $second_role: cilium service list (full, head 40) -------"
+        KUBECONFIG="$kc_second" kubectl -n kube-system exec ds/cilium -c cilium-agent -- cilium service list 2>&1 | head -40 || true
+        echo "------- $second_role: cilium-operator logs (tail 60) -------"
+        KUBECONFIG="$kc_second" kubectl -n kube-system logs -l io.cilium/app=operator --tail=60 2>&1 \
+          | grep -iE 'global|clustermesh|cluster-mesh|cm-smoke|service' || true
+
+        echo
+        echo "------- DNS + direct-pod-IP probe from curl pod (bypass ClusterIP) -------"
+        # ClusterIP plumbing is a Cilium-clustermesh concern; direct pod-IP
+        # connectivity is a VNet-peering concern. Hitting a backend pod IP
+        # directly disambiguates the two failure modes.
+        KUBECONFIG="$kc_second" kubectl -n "$ns" exec curl -- nslookup echo.cm-smoke.svc.cluster.local 2>&1 || true
+        backend_ip=$(KUBECONFIG="$kc_first" kubectl -n "$ns" get pod -l app=echo -o jsonpath='{.items[0].status.podIP}' 2>/dev/null || true)
+        echo "first cluster's echo pod IP: ${backend_ip:-<none>}"
+        if [ -n "${backend_ip:-}" ]; then
+          KUBECONFIG="$kc_second" kubectl -n "$ns" exec curl -- \
+            curl -fsS -m 5 "http://${backend_ip}:8080/hostname" 2>&1 || \
+            echo "  direct pod-IP curl ALSO failed → cross-VNet routing issue (peering / pod-CIDR routes)"
+        fi
+        echo "============================ END SMOKE DIAG ============================"
+        # =========================== END SMOKE-FAILURE-DEBUG-DUMP ===========================
+
+        echo "##vso[task.logissue type=error;] Cross-cluster data-path smoke failed: $second_role could not reach service in $first_role"
+        exit 1
+      fi
+    displayName: "Cross-cluster data-path smoke (global service curl)"