From a7010b24a6feed51f2fa48c6fcd13f99b662210e Mon Sep 17 00:00:00 2001 From: Ken Ahrens Date: Thu, 11 Jun 2026 08:57:50 -0400 Subject: [PATCH 1/2] Fix JVM stability for Java services under continuous sim load Both transactions-service and accounts-service were crashing under continuous simulator traffic with 128m heap inside a 512Mi container. Bump to 384m heap / 1Gi limit. Disable the ErrorSpikeFilter on transactions to get clean captured traffic for mock building. Co-Authored-By: Claude Opus 4.6 --- kubernetes/base/configmaps/app-config.yaml | 1 + .../base/deployments/accounts-service-deployment.yaml | 7 +++---- .../base/deployments/transactions-service-deployment.yaml | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/kubernetes/base/configmaps/app-config.yaml b/kubernetes/base/configmaps/app-config.yaml index c2084db..6353f8d 100644 --- a/kubernetes/base/configmaps/app-config.yaml +++ b/kubernetes/base/configmaps/app-config.yaml @@ -96,6 +96,7 @@ data: DB_SCHEMA: "transactions_service" SERVER_PORT: "8080" SPRING_PROFILES_ACTIVE: "docker" + ERROR_SPIKE_ENABLED: "false" MANAGEMENT_TRACING_ENABLED: "true" MANAGEMENT_ENDPOINT_HEALTH_PROBES_ENABLED: "true" MANAGEMENT_TRACING_SAMPLING_PROBABILITY: "1.0" diff --git a/kubernetes/base/deployments/accounts-service-deployment.yaml b/kubernetes/base/deployments/accounts-service-deployment.yaml index 73c0985..1c398da 100644 --- a/kubernetes/base/deployments/accounts-service-deployment.yaml +++ b/kubernetes/base/deployments/accounts-service-deployment.yaml @@ -44,7 +44,7 @@ spec: name: banking-jwt-secret key: secret - name: JAVA_OPTS - value: "-Xms64m -Xmx128m -XX:MaxMetaspaceSize=128m -XX:ReservedCodeCacheSize=32m -XX:MaxDirectMemorySize=16m -XX:+UseSerialGC -XX:+UseStringDeduplication -XX:+UseContainerSupport -Djava.security.egd=file:/dev/./urandom -Dspring.main.lazy-initialization=true" + value: "-Xms128m -Xmx384m -XX:MaxMetaspaceSize=128m -XX:ReservedCodeCacheSize=32m -XX:MaxDirectMemorySize=16m -XX:+UseSerialGC -XX:+UseStringDeduplication -XX:+UseContainerSupport -Djava.security.egd=file:/dev/./urandom -Dspring.main.lazy-initialization=true" - name: AWS_S3_BUCKET value: "banking-app-statements" - name: AWS_REGION @@ -60,11 +60,10 @@ spec: name: banking-accounts-config resources: requests: - memory: "256Mi" + memory: "384Mi" cpu: "50m" limits: - memory: "512Mi" - # Right-sized for live sim traffic; 150m throttled the JVM (see user-service). + memory: "1Gi" cpu: "1000m" startupProbe: tcpSocket: diff --git a/kubernetes/base/deployments/transactions-service-deployment.yaml b/kubernetes/base/deployments/transactions-service-deployment.yaml index 3501897..35e9547 100644 --- a/kubernetes/base/deployments/transactions-service-deployment.yaml +++ b/kubernetes/base/deployments/transactions-service-deployment.yaml @@ -44,7 +44,7 @@ spec: name: banking-jwt-secret key: secret - name: JAVA_OPTS - value: "-Xms64m -Xmx128m -XX:MaxMetaspaceSize=128m -XX:ReservedCodeCacheSize=32m -XX:MaxDirectMemorySize=16m -XX:+UseSerialGC -XX:+UseStringDeduplication -XX:+UseContainerSupport -Djava.security.egd=file:/dev/./urandom -Dspring.main.lazy-initialization=true" + value: "-Xms128m -Xmx384m -XX:MaxMetaspaceSize=128m -XX:ReservedCodeCacheSize=32m -XX:MaxDirectMemorySize=16m -XX:+UseSerialGC -XX:+UseStringDeduplication -XX:+UseContainerSupport -Djava.security.egd=file:/dev/./urandom -Dspring.main.lazy-initialization=true" envFrom: - configMapRef: name: app-config @@ -52,10 +52,10 @@ spec: name: banking-transactions-config resources: requests: - memory: "256Mi" + memory: "384Mi" cpu: "50m" limits: - memory: "512Mi" + memory: "1Gi" # Right-sized for live sim traffic; 150m throttled the JVM (see user-service). cpu: "1000m" startupProbe: From 60c0b98759ff01011a0018189e7e8c32c42c5113 Mon Sep 17 00:00:00 2001 From: Ken Ahrens Date: Thu, 11 Jun 2026 09:01:38 -0400 Subject: [PATCH 2/2] Add Prometheus scrape target Services for forwarder and nettap The OTel collector scrape config references speedscale-forwarder-metrics and speedscale-nettap-metrics Services on port 4145 but they didn't exist, so all Speedscale infra metrics panels in Grafana were empty. Co-Authored-By: Claude Opus 4.6 --- .../overlays/speedscale/kustomization.yaml | 1 + .../speedscale-metrics-services.yaml | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 kubernetes/overlays/speedscale/speedscale-metrics-services.yaml diff --git a/kubernetes/overlays/speedscale/kustomization.yaml b/kubernetes/overlays/speedscale/kustomization.yaml index aa9205f..143892d 100644 --- a/kubernetes/overlays/speedscale/kustomization.yaml +++ b/kubernetes/overlays/speedscale/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization resources: - ../../base - ../../observability +- speedscale-metrics-services.yaml patches: # Add Istio injection and Speedscale labels to the existing banking-app namespace diff --git a/kubernetes/overlays/speedscale/speedscale-metrics-services.yaml b/kubernetes/overlays/speedscale/speedscale-metrics-services.yaml new file mode 100644 index 0000000..8cbea33 --- /dev/null +++ b/kubernetes/overlays/speedscale/speedscale-metrics-services.yaml @@ -0,0 +1,34 @@ +apiVersion: v1 +kind: Service +metadata: + name: speedscale-forwarder-metrics + namespace: speedscale + labels: + app.kubernetes.io/name: speedscale + app.kubernetes.io/component: forwarder-metrics +spec: + selector: + app: speedscale-forwarder + controlplane.speedscale.com/component: forwarder + ports: + - port: 4145 + targetPort: 4145 + protocol: TCP + name: metrics +--- +apiVersion: v1 +kind: Service +metadata: + name: speedscale-nettap-metrics + namespace: speedscale + labels: + app.kubernetes.io/name: speedscale + app.kubernetes.io/component: nettap-metrics +spec: + selector: + app: speedscale-nettap + ports: + - port: 4145 + targetPort: 4145 + protocol: TCP + name: metrics