From 82c7398aad7f2bfefa14231795aa8faeb951ca90 Mon Sep 17 00:00:00 2001 From: Matthew Li Date: Mon, 18 May 2026 17:42:15 -0400 Subject: [PATCH 1/5] init --- .../trace/agent/jmxfetch/JMXFetch.java | 2 +- .../agent/jmxfetch/JvmOtlpRuntimeMetrics.java | 154 +++++++++++++++--- .../metrics/JvmOtlpRuntimeMetricsTest.java | 25 ++- .../datadog/trace/api/ConfigDefaults.java | 2 + .../datadog/trace/api/config/OtlpConfig.java | 2 + .../main/java/datadog/trace/api/Config.java | 13 ++ .../provider/OtelEnvironmentConfigSource.java | 6 + 7 files changed, 174 insertions(+), 30 deletions(-) diff --git a/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JMXFetch.java b/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JMXFetch.java index 5375fb8b3e6..a46b1cb81a1 100644 --- a/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JMXFetch.java +++ b/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JMXFetch.java @@ -104,7 +104,7 @@ private static void run(final StatsDClientManager statsDClientManager, final Con // Register JVM runtime metric callbacks against the OtelMeterProvider so the OTLP // exporter started by CoreTracer collects them. Started here so it rides the same // delayed-start path as JMXFetch itself. - JvmOtlpRuntimeMetrics.start(); + JvmOtlpRuntimeMetrics.start(config.isMetricsOtelExperimentalEnabled()); // When the OTLP exporter is collecting JVM runtime metrics, skip the default JMXFetch // JVM config to avoid double-reporting. defaultConfigs.add(OTLP_JMX_CONFIG); diff --git a/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java b/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java index db6cd431bff..e128021ea55 100644 --- a/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java +++ b/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java @@ -5,6 +5,7 @@ import static datadog.trace.bootstrap.otel.metrics.OtelInstrumentType.UP_DOWN_COUNTER; import com.sun.management.OperatingSystemMXBean; +import com.sun.management.UnixOperatingSystemMXBean; import datadog.trace.bootstrap.otel.api.common.AttributeKey; import datadog.trace.bootstrap.otel.api.common.Attributes; import datadog.trace.bootstrap.otel.common.OtelInstrumentationScope; @@ -50,8 +51,14 @@ public final class JvmOtlpRuntimeMetrics { private static final AtomicBoolean started = new AtomicBoolean(false); - /** Registers all JVM runtime metric instruments on the bootstrap-level metric registry. */ - public static void start() { + /** + * Registers all JVM runtime metric instruments on the bootstrap-level metric registry. + * + * @param emitExperimentalMetrics when {@code true} (the spec-aligned default), metrics marked as + * Development in the OTel semantic conventions are also registered. When {@code + * false}, only metrics with stable status are emitted. + */ + public static void start(boolean emitExperimentalMetrics) { if (!started.compareAndSet(false, true)) { return; } @@ -66,20 +73,30 @@ public static void start() { ((Attributes) attributes) .forEach((a, v) -> visitor.visitAttribute(a.getType().ordinal(), a.getKey(), v))); + // Stable metrics — always registered. registerMemoryMetrics(); - registerBufferMetrics(); registerThreadMetrics(); registerClassLoadingMetrics(); registerCpuMetrics(); - log.debug("Started OTLP runtime metrics with OTel-native naming (jvm.*)"); + + // Development-status metrics — gated by the experimental flag. + if (emitExperimentalMetrics) { + registerMemoryInitMetric(); + registerBufferMetrics(); + registerSystemCpuMetrics(); + registerFileDescriptorMetrics(); + } + log.debug( + "Started OTLP runtime metrics with OTel-native naming (jvm.*), experimental={}", + emitExperimentalMetrics); } catch (Exception e) { log.error("Failed to start JVM OTLP runtime metrics", e); } } /** - * jvm.memory.used, jvm.memory.committed, jvm.memory.limit, jvm.memory.init, - * jvm.memory.used_after_last_gc — all UpDownCounter per spec. + * jvm.memory.used, jvm.memory.committed, jvm.memory.limit, jvm.memory.used_after_last_gc — all + * Stable per spec. All UpDownCounter. */ private static void registerMemoryMetrics() { MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean(); @@ -133,6 +150,24 @@ private static void registerMemoryMetrics() { } }); + registerLongObservable( + "jvm.memory.used_after_last_gc", + "Measure of memory used after the most recent garbage collection event.", + "By", + UP_DOWN_COUNTER, + storage -> { + for (MemoryPoolMXBean pool : pools) { + MemoryUsage collectionUsage = pool.getCollectionUsage(); + if (collectionUsage != null && collectionUsage.getUsed() >= 0) { + storage.recordLong(collectionUsage.getUsed(), poolAttributes(pool)); + } + } + }); + } + + /** jvm.memory.init (UpDownCounter, Development). */ + private static void registerMemoryInitMetric() { + MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean(); registerLongObservable( "jvm.memory.init", "Measure of initial memory requested.", @@ -148,20 +183,6 @@ private static void registerMemoryMetrics() { storage.recordLong(nonHeapInit, NON_HEAP_ATTRS); } }); - - registerLongObservable( - "jvm.memory.used_after_last_gc", - "Measure of memory used after the most recent garbage collection event.", - "By", - UP_DOWN_COUNTER, - storage -> { - for (MemoryPoolMXBean pool : pools) { - MemoryUsage collectionUsage = pool.getCollectionUsage(); - if (collectionUsage != null && collectionUsage.getUsed() >= 0) { - storage.recordLong(collectionUsage.getUsed(), poolAttributes(pool)); - } - } - }); } /** jvm.buffer.* (UpDownCounter, Development) — direct + mapped pool metrics. */ @@ -234,10 +255,7 @@ private static void registerClassLoadingMetrics() { * Stable per spec. */ private static void registerCpuMetrics() { - java.lang.management.OperatingSystemMXBean rawOsBean = - ManagementFactory.getOperatingSystemMXBean(); - OperatingSystemMXBean osBean = - rawOsBean instanceof OperatingSystemMXBean ? (OperatingSystemMXBean) rawOsBean : null; + OperatingSystemMXBean osBean = sunOsBean(); if (osBean != null) { registerDoubleObservable( @@ -263,6 +281,9 @@ private static void registerCpuMetrics() { storage.recordDouble(cpuLoad, Attributes.empty()); } }); + } else { + log.debug( + "com.sun.management.OperatingSystemMXBean not available; skipping jvm.cpu.time and jvm.cpu.recent_utilization"); } registerLongObservable( @@ -274,6 +295,91 @@ private static void registerCpuMetrics() { storage.recordLong(Runtime.getRuntime().availableProcessors(), Attributes.empty())); } + /** + * jvm.system.cpu.utilization (Gauge) and jvm.system.cpu.load_1m (Gauge) — both Development per + * spec. + */ + private static void registerSystemCpuMetrics() { + OperatingSystemMXBean osBean = sunOsBean(); + if (osBean != null) { + registerDoubleObservable( + "jvm.system.cpu.utilization", + "Recent CPU utilization for the whole system as reported by the JVM.", + "1", + GAUGE, + storage -> { + double load = osBean.getSystemCpuLoad(); + if (load >= 0) { + storage.recordDouble(load, Attributes.empty()); + } + }); + } else { + log.debug( + "com.sun.management.OperatingSystemMXBean not available; skipping jvm.system.cpu.utilization"); + } + + java.lang.management.OperatingSystemMXBean stdOsBean = + ManagementFactory.getOperatingSystemMXBean(); + registerDoubleObservable( + "jvm.system.cpu.load_1m", + "Average CPU load of the whole system for the last minute as reported by the JVM.", + "{run_queue_item}", + GAUGE, + storage -> { + double load = stdOsBean.getSystemLoadAverage(); + if (load >= 0) { + storage.recordDouble(load, Attributes.empty()); + } + }); + } + + /** + * jvm.file_descriptor.count (UpDownCounter) and jvm.file_descriptor.limit (UpDownCounter) — both + * Development per spec. Only registered when the underlying JVM exposes {@link + * UnixOperatingSystemMXBean} (Unix-like platforms). + */ + private static void registerFileDescriptorMetrics() { + java.lang.management.OperatingSystemMXBean rawOsBean = + ManagementFactory.getOperatingSystemMXBean(); + if (!(rawOsBean instanceof UnixOperatingSystemMXBean)) { + log.debug( + "com.sun.management.UnixOperatingSystemMXBean not available (non-Unix JVM); skipping jvm.file_descriptor.count and jvm.file_descriptor.limit"); + return; + } + UnixOperatingSystemMXBean unixOsBean = (UnixOperatingSystemMXBean) rawOsBean; + + registerLongObservable( + "jvm.file_descriptor.count", + "Number of open file descriptors as reported by the JVM.", + "{file_descriptor}", + UP_DOWN_COUNTER, + storage -> { + long count = unixOsBean.getOpenFileDescriptorCount(); + if (count >= 0) { + storage.recordLong(count, Attributes.empty()); + } + }); + + registerLongObservable( + "jvm.file_descriptor.limit", + "Measure of max open file descriptors as reported by the JVM.", + "{file_descriptor}", + UP_DOWN_COUNTER, + storage -> { + long limit = unixOsBean.getMaxFileDescriptorCount(); + if (limit >= 0) { + storage.recordLong(limit, Attributes.empty()); + } + }); + } + + /** Returns the {@code com.sun.management} OS bean if available on this JVM, else {@code null}. */ + private static OperatingSystemMXBean sunOsBean() { + java.lang.management.OperatingSystemMXBean rawOsBean = + ManagementFactory.getOperatingSystemMXBean(); + return rawOsBean instanceof OperatingSystemMXBean ? (OperatingSystemMXBean) rawOsBean : null; + } + /** * Registers an UpDownCounter that iterates each platform buffer pool and records {@code getter} * with the {@code jvm.buffer.pool.name} attribute. Skips negative readings. diff --git a/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java b/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java index 6d220161bee..a408e770975 100644 --- a/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java +++ b/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java @@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import com.sun.management.UnixOperatingSystemMXBean; import datadog.trace.agent.jmxfetch.JvmOtlpRuntimeMetrics; import datadog.trace.bootstrap.otel.common.OtelInstrumentationScope; import datadog.trace.bootstrap.otel.metrics.OtelInstrumentDescriptor; @@ -15,6 +16,7 @@ import datadog.trace.bootstrap.otlp.metrics.OtlpMetricVisitor; import datadog.trace.bootstrap.otlp.metrics.OtlpMetricsVisitor; import datadog.trace.bootstrap.otlp.metrics.OtlpScopedMetricsVisitor; +import java.lang.management.ManagementFactory; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -43,7 +45,7 @@ public class JvmOtlpRuntimeMetricsTest { @BeforeAll static void setUp() { System.setProperty("dd.metrics.otel.enabled", "true"); - JvmOtlpRuntimeMetrics.start(); + JvmOtlpRuntimeMetrics.start(true); } @Test @@ -67,7 +69,9 @@ void registersExpectedJvmMetrics() { "jvm.class.unloaded", "jvm.cpu.time", "jvm.cpu.count", - "jvm.cpu.recent_utilization"); + "jvm.cpu.recent_utilization", + "jvm.system.cpu.utilization", + "jvm.system.cpu.load_1m"); Set names = collector.metricNames; for (String metric : expectedMetrics) { @@ -76,7 +80,18 @@ void registersExpectedJvmMetrics() { "Expected metric '" + metric + "' not found. Got: " + new TreeSet<>(names)); } - assertEquals(15, names.size(), "Expected 15 metrics, got: " + new TreeSet<>(names)); + int expectedSize = expectedMetrics.size(); + if (ManagementFactory.getOperatingSystemMXBean() instanceof UnixOperatingSystemMXBean) { + assertTrue( + names.contains("jvm.file_descriptor.count"), + "Expected jvm.file_descriptor.count on Unix. Got: " + new TreeSet<>(names)); + assertTrue( + names.contains("jvm.file_descriptor.limit"), + "Expected jvm.file_descriptor.limit on Unix. Got: " + new TreeSet<>(names)); + expectedSize += 2; + } + + assertEquals(expectedSize, names.size(), "Unexpected metric count: " + new TreeSet<>(names)); // No DD-proprietary names should be present List ddNames = @@ -136,8 +151,8 @@ void startIsIdempotent() { OtelMetricRegistry.INSTANCE.collectMetrics(before); int countBefore = before.metricNames.size(); - JvmOtlpRuntimeMetrics.start(); - JvmOtlpRuntimeMetrics.start(); + JvmOtlpRuntimeMetrics.start(true); + JvmOtlpRuntimeMetrics.start(true); MetricCollector after = new MetricCollector(); OtelMetricRegistry.INSTANCE.collectMetrics(after); diff --git a/dd-trace-api/src/main/java/datadog/trace/api/ConfigDefaults.java b/dd-trace-api/src/main/java/datadog/trace/api/ConfigDefaults.java index 12381a03aa5..a88c00db14e 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/ConfigDefaults.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/ConfigDefaults.java @@ -114,6 +114,8 @@ public final class ConfigDefaults { static final int DEFAULT_METRICS_OTEL_TIMEOUT = 7_500; // ms static final int DEFAULT_METRICS_OTEL_CARDINALITY_LIMIT = 2_000; + public static final boolean DEFAULT_METRICS_OTEL_EXPERIMENTAL_ENABLED = true; + public static final int DEFAULT_OTLP_TRACES_TIMEOUT = 10_000; // ms static final String DEFAULT_OTLP_HTTP_LOGS_ENDPOINT = "v1/logs"; diff --git a/dd-trace-api/src/main/java/datadog/trace/api/config/OtlpConfig.java b/dd-trace-api/src/main/java/datadog/trace/api/config/OtlpConfig.java index 46aa07303f2..e3925dbeb40 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/config/OtlpConfig.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/config/OtlpConfig.java @@ -20,6 +20,8 @@ public final class OtlpConfig { public static final String METRICS_OTEL_INTERVAL = "metrics.otel.interval"; public static final String METRICS_OTEL_TIMEOUT = "metrics.otel.timeout"; public static final String METRICS_OTEL_CARDINALITY_LIMIT = "metrics.otel.cardinality.limit"; + public static final String METRICS_OTEL_EXPERIMENTAL_ENABLED = + "metrics.otel.experimental.enabled"; public static final String OTLP_METRICS_ENDPOINT = "otlp.metrics.endpoint"; public static final String OTLP_METRICS_HEADERS = "otlp.metrics.headers"; diff --git a/internal-api/src/main/java/datadog/trace/api/Config.java b/internal-api/src/main/java/datadog/trace/api/Config.java index a463887f61a..602a5f7da07 100644 --- a/internal-api/src/main/java/datadog/trace/api/Config.java +++ b/internal-api/src/main/java/datadog/trace/api/Config.java @@ -118,6 +118,7 @@ import static datadog.trace.api.ConfigDefaults.DEFAULT_LOGS_OTEL_QUEUE_SIZE; import static datadog.trace.api.ConfigDefaults.DEFAULT_LOGS_OTEL_TIMEOUT; import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_CARDINALITY_LIMIT; +import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_EXPERIMENTAL_ENABLED; import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_INTERVAL; import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_TIMEOUT; import static datadog.trace.api.ConfigDefaults.DEFAULT_OTLP_GRPC_PORT; @@ -466,6 +467,7 @@ import static datadog.trace.api.config.OtlpConfig.LOGS_OTEL_QUEUE_SIZE; import static datadog.trace.api.config.OtlpConfig.LOGS_OTEL_TIMEOUT; import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_CARDINALITY_LIMIT; +import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_EXPERIMENTAL_ENABLED; import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_EXPORTER; import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_INTERVAL; import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_TIMEOUT; @@ -970,6 +972,7 @@ public static String getHostName() { private final int metricsOtelInterval; private final int metricsOtelTimeout; private final int metricsOtelCardinalityLimit; + private final boolean metricsOtelExperimentalEnabled; private final String otlpMetricsEndpoint; private final Map otlpMetricsHeaders; private final OtlpConfig.Protocol otlpMetricsProtocol; @@ -2054,6 +2057,10 @@ private Config(final ConfigProvider configProvider, final InstrumenterConfig ins } metricsOtelTimeout = otelTimeout; + metricsOtelExperimentalEnabled = + configProvider.getBoolean( + METRICS_OTEL_EXPERIMENTAL_ENABLED, DEFAULT_METRICS_OTEL_EXPERIMENTAL_ENABLED); + // keep OTLP default timeout below the overall export timeout int defaultOtlpMetricsTimeout = Math.min(metricsOtelTimeout, DEFAULT_METRICS_OTEL_TIMEOUT); otlpTimeout = configProvider.getInteger(OTLP_METRICS_TIMEOUT, defaultOtlpMetricsTimeout); @@ -5479,6 +5486,10 @@ public boolean isMetricsOtlpExporterEnabled() { return "otlp".equalsIgnoreCase(metricsOtelExporter); } + public boolean isMetricsOtelExperimentalEnabled() { + return metricsOtelExperimentalEnabled; + } + public int getMetricsOtelCardinalityLimit() { return metricsOtelCardinalityLimit; } @@ -6601,6 +6612,8 @@ public String toString() { + metricsOtelTimeout + ", metricsOtelCardinalityLimit=" + metricsOtelCardinalityLimit + + ", metricsOtelExperimentalEnabled=" + + metricsOtelExperimentalEnabled + ", otlpMetricsEndpoint=" + otlpMetricsEndpoint + ", otlpMetricsHeaders=" diff --git a/utils/config-utils/src/main/java/datadog/trace/bootstrap/config/provider/OtelEnvironmentConfigSource.java b/utils/config-utils/src/main/java/datadog/trace/bootstrap/config/provider/OtelEnvironmentConfigSource.java index 522808170d2..4d923dead01 100644 --- a/utils/config-utils/src/main/java/datadog/trace/bootstrap/config/provider/OtelEnvironmentConfigSource.java +++ b/utils/config-utils/src/main/java/datadog/trace/bootstrap/config/provider/OtelEnvironmentConfigSource.java @@ -17,6 +17,7 @@ import static datadog.trace.api.config.OtlpConfig.LOGS_OTEL_TIMEOUT; import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_CARDINALITY_LIMIT; import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_ENABLED; +import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_EXPERIMENTAL_ENABLED; import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_EXPORTER; import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_INTERVAL; import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_TIMEOUT; @@ -195,6 +196,11 @@ private void setupMetricsOtelEnvironment() { METRICS_OTEL_CARDINALITY_LIMIT, getOtelProperty( "otel.java.metrics.cardinality.limit", "dd." + METRICS_OTEL_CARDINALITY_LIMIT)); + capture( + METRICS_OTEL_EXPERIMENTAL_ENABLED, + getOtelProperty( + "otel.instrumentation.runtime-telemetry.emit-experimental-metrics", + "dd." + METRICS_OTEL_EXPERIMENTAL_ENABLED)); String exporter = getOtelProperty("otel.metrics.exporter"); if (exporter == null || "otlp".equalsIgnoreCase(exporter)) { // metrics defaults to OTLP From b2a3ff67e4858a1ae3345a289c36d7f9cd8046e1 Mon Sep 17 00:00:00 2001 From: Matthew Li Date: Mon, 18 May 2026 17:57:17 -0400 Subject: [PATCH 2/5] update checks to match OTel checks --- .../trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java b/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java index e128021ea55..efee922934b 100644 --- a/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java +++ b/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java @@ -135,16 +135,16 @@ private static void registerMemoryMetrics() { UP_DOWN_COUNTER, storage -> { long heapMax = memoryBean.getHeapMemoryUsage().getMax(); - if (heapMax > 0) { + if (heapMax != -1) { storage.recordLong(heapMax, HEAP_ATTRS); } long nonHeapMax = memoryBean.getNonHeapMemoryUsage().getMax(); - if (nonHeapMax > 0) { + if (nonHeapMax != -1) { storage.recordLong(nonHeapMax, NON_HEAP_ATTRS); } for (MemoryPoolMXBean pool : pools) { long max = pool.getUsage().getMax(); - if (max > 0) { + if (max != -1) { storage.recordLong(max, poolAttributes(pool)); } } @@ -175,11 +175,11 @@ private static void registerMemoryInitMetric() { UP_DOWN_COUNTER, storage -> { long heapInit = memoryBean.getHeapMemoryUsage().getInit(); - if (heapInit > 0) { + if (heapInit != -1) { storage.recordLong(heapInit, HEAP_ATTRS); } long nonHeapInit = memoryBean.getNonHeapMemoryUsage().getInit(); - if (nonHeapInit > 0) { + if (nonHeapInit != -1) { storage.recordLong(nonHeapInit, NON_HEAP_ATTRS); } }); From cbdabdde1d8e5944e7610d5852537155fae44107 Mon Sep 17 00:00:00 2001 From: Matthew Li Date: Wed, 20 May 2026 13:44:28 -0400 Subject: [PATCH 3/5] adding jvm.gc.duration --- .../agent/jmxfetch/JvmOtlpRuntimeMetrics.java | 106 ++++++++++++++++++ .../metrics/JvmOtlpRuntimeMetricsTest.java | 32 +++++- 2 files changed, 137 insertions(+), 1 deletion(-) diff --git a/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java b/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java index efee922934b..908ccaa5273 100644 --- a/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java +++ b/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JvmOtlpRuntimeMetrics.java @@ -2,8 +2,10 @@ import static datadog.trace.bootstrap.otel.metrics.OtelInstrumentType.COUNTER; import static datadog.trace.bootstrap.otel.metrics.OtelInstrumentType.GAUGE; +import static datadog.trace.bootstrap.otel.metrics.OtelInstrumentType.HISTOGRAM; import static datadog.trace.bootstrap.otel.metrics.OtelInstrumentType.UP_DOWN_COUNTER; +import com.sun.management.GarbageCollectionNotificationInfo; import com.sun.management.OperatingSystemMXBean; import com.sun.management.UnixOperatingSystemMXBean; import datadog.trace.bootstrap.otel.api.common.AttributeKey; @@ -17,17 +19,24 @@ import datadog.trace.bootstrap.otel.metrics.data.OtelRunnableObservable; import java.lang.management.BufferPoolMXBean; import java.lang.management.ClassLoadingMXBean; +import java.lang.management.GarbageCollectorMXBean; import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; import java.lang.management.MemoryPoolMXBean; import java.lang.management.MemoryUsage; import java.lang.management.ThreadMXBean; +import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.ToLongFunction; +import javax.management.Notification; +import javax.management.NotificationEmitter; +import javax.management.NotificationFilter; +import javax.management.NotificationListener; +import javax.management.openmbean.CompositeData; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,9 +55,17 @@ public final class JvmOtlpRuntimeMetrics { AttributeKey.stringKey("jvm.memory.pool.name"); private static final AttributeKey BUFFER_POOL = AttributeKey.stringKey("jvm.buffer.pool.name"); + private static final AttributeKey GC_NAME = AttributeKey.stringKey("jvm.gc.name"); + private static final AttributeKey GC_ACTION = AttributeKey.stringKey("jvm.gc.action"); + private static final AttributeKey GC_CAUSE = AttributeKey.stringKey("jvm.gc.cause"); private static final Attributes HEAP_ATTRS = Attributes.of(MEMORY_TYPE, "heap"); private static final Attributes NON_HEAP_ATTRS = Attributes.of(MEMORY_TYPE, "non_heap"); + /** Explicit bucket advice for jvm.gc.duration in seconds (matches OTel runtime-telemetry). */ + private static final List GC_DURATION_BUCKETS = Arrays.asList(0.01, 0.1, 1.0, 10.0); + + private static final String GC_NOTIFICATION_TYPE = "com.sun.management.gc.notification"; + private static final AtomicBoolean started = new AtomicBoolean(false); /** @@ -78,6 +95,7 @@ public static void start(boolean emitExperimentalMetrics) { registerThreadMetrics(); registerClassLoadingMetrics(); registerCpuMetrics(); + registerGcDurationMetric(emitExperimentalMetrics); // Development-status metrics — gated by the experimental flag. if (emitExperimentalMetrics) { @@ -295,6 +313,79 @@ private static void registerCpuMetrics() { storage.recordLong(Runtime.getRuntime().availableProcessors(), Attributes.empty())); } + /** + * jvm.gc.duration (Histogram, Stable) — synchronous; recorded from a JMX notification listener + * attached to each {@link GarbageCollectorMXBean} when the JVM completes a GC. + * + *

The {@code jvm.gc.cause} attribute is gated on {@code captureGcCause} because cause is not + * part of the stable attribute set in the OTel semantic conventions. + */ + private static void registerGcDurationMetric(boolean captureGcCause) { + if (!isGcNotificationInfoAvailable()) { + log.debug( + "com.sun.management.GarbageCollectionNotificationInfo not available; skipping jvm.gc.duration"); + return; + } + OtelMetricStorage storage = + registerDoubleHistogramStorage( + "jvm.gc.duration", + "Duration of JVM garbage collection actions.", + "s", + GC_DURATION_BUCKETS); + NotificationFilter filter = n -> GC_NOTIFICATION_TYPE.equals(n.getType()); + GcNotificationListener listener = new GcNotificationListener(storage, captureGcCause); + for (GarbageCollectorMXBean bean : ManagementFactory.getGarbageCollectorMXBeans()) { + if (bean instanceof NotificationEmitter) { + ((NotificationEmitter) bean).addNotificationListener(listener, filter, null); + } + } + } + + private static boolean isGcNotificationInfoAvailable() { + try { + Class.forName( + "com.sun.management.GarbageCollectionNotificationInfo", + false, + GarbageCollectorMXBean.class.getClassLoader()); + return true; + } catch (ClassNotFoundException e) { + return false; + } + } + + private static void recordGcDuration( + OtelMetricStorage storage, GarbageCollectionNotificationInfo info, boolean captureGcCause) { + double durationSeconds = info.getGcInfo().getDuration() / 1000d; + Attributes attrs = + captureGcCause + ? Attributes.of( + GC_NAME, info.getGcName(), + GC_ACTION, info.getGcAction(), + GC_CAUSE, info.getGcCause()) + : Attributes.of( + GC_NAME, info.getGcName(), + GC_ACTION, info.getGcAction()); + storage.recordDouble(durationSeconds, attrs); + } + + /** Listener fired by the JVM on the JMX notification thread when a GC completes. */ + static final class GcNotificationListener implements NotificationListener { + private final OtelMetricStorage storage; + private final boolean captureGcCause; + + GcNotificationListener(OtelMetricStorage storage, boolean captureGcCause) { + this.storage = storage; + this.captureGcCause = captureGcCause; + } + + @Override + public void handleNotification(Notification notification, Object handback) { + GarbageCollectionNotificationInfo info = + GarbageCollectionNotificationInfo.from((CompositeData) notification.getUserData()); + recordGcDuration(storage, info, captureGcCause); + } + } + /** * jvm.system.cpu.utilization (Gauge) and jvm.system.cpu.load_1m (Gauge) — both Development per * spec. @@ -438,6 +529,21 @@ private static void registerObservable( JVM_SCOPE, new OtelRunnableObservable(() -> callback.accept(storage))); } + /** + * Registers a synchronous double histogram against the bootstrap registry and returns its storage + * so callers can record values directly (e.g. from a JMX notification listener). + */ + private static OtelMetricStorage registerDoubleHistogramStorage( + String name, String description, String unit, List bucketBoundaries) { + OtelInstrumentBuilder builder = OtelInstrumentBuilder.ofDoubles(name, HISTOGRAM); + builder.setDescription(description); + builder.setUnit(unit); + return OtelMetricRegistry.INSTANCE.registerStorage( + JVM_SCOPE, + builder.descriptor(), + descriptor -> OtelMetricStorage.newHistogramStorage(descriptor, bucketBoundaries)); + } + /** Registers metric storage for the instrument against the bootstrap registry. */ private static OtelMetricStorage registerStorage(OtelInstrumentDescriptor descriptor) { Function storageFactory; diff --git a/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java b/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java index a408e770975..720a65bd723 100644 --- a/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java +++ b/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java @@ -71,7 +71,8 @@ void registersExpectedJvmMetrics() { "jvm.cpu.count", "jvm.cpu.recent_utilization", "jvm.system.cpu.utilization", - "jvm.system.cpu.load_1m"); + "jvm.system.cpu.load_1m", + "jvm.gc.duration"); Set names = collector.metricNames; for (String metric : expectedMetrics) { @@ -145,6 +146,35 @@ void jvmThreadCountIsPositive() { "jvm.thread.count value should be positive, got " + threadPoints.get(0).value); } + @Test + void jvmGcDurationRecordsDataPointsAfterGc() throws InterruptedException { + // Force a GC; the JMX NotificationListener should observe the event and record a data + // point onto the jvm.gc.duration histogram. + System.gc(); + + // JMX delivers the notification on the JVM's internal notification thread, so we have + // to poll briefly. Two seconds is generous — delivery is typically sub-50ms. + List points = null; + long deadlineNanos = System.nanoTime() + java.util.concurrent.TimeUnit.SECONDS.toNanos(2); + while (System.nanoTime() < deadlineNanos) { + MetricCollector collector = new MetricCollector(); + OtelMetricRegistry.INSTANCE.collectMetrics(collector); + points = collector.points.get("jvm.gc.duration"); + if (points != null && !points.isEmpty()) { + break; + } + Thread.sleep(50); + } + + assertNotNull(points, "jvm.gc.duration should have data points after System.gc()"); + assertFalse(points.isEmpty(), "jvm.gc.duration should have at least one data point"); + assertTrue( + points.stream() + .allMatch( + p -> p.attrs.containsKey("jvm.gc.name") && p.attrs.containsKey("jvm.gc.action")), + "Every jvm.gc.duration data point should carry jvm.gc.name and jvm.gc.action attributes"); + } + @Test void startIsIdempotent() { MetricCollector before = new MetricCollector(); From de166ab4c829781bb50efa047b057096098568ea Mon Sep 17 00:00:00 2001 From: Matthew Li Date: Wed, 20 May 2026 13:45:42 -0400 Subject: [PATCH 4/5] adding tests for experimental off --- .../JvmOtlpRuntimeMetricsForkedTest.java | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsForkedTest.java diff --git a/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsForkedTest.java b/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsForkedTest.java new file mode 100644 index 00000000000..e4beefeab46 --- /dev/null +++ b/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsForkedTest.java @@ -0,0 +1,107 @@ +package opentelemetry147.metrics; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import datadog.trace.agent.jmxfetch.JvmOtlpRuntimeMetrics; +import datadog.trace.bootstrap.otel.metrics.data.OtelMetricRegistry; +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.TimeUnit; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +// Forked test: runs in an isolated JVM and starts JvmOtlpRuntimeMetrics with the experimental +// flag OFF, verifying that Development-status instruments are not registered and that the +// jvm.gc.cause attribute is omitted from jvm.gc.duration data points. The JvmOtlpRuntimeMetrics +// class uses a one-shot AtomicBoolean to guard registration, so this scenario must run in its +// own JVM separate from the always-on JvmOtlpRuntimeMetricsTest. +class JvmOtlpRuntimeMetricsForkedTest { + + @BeforeAll + static void setUp() { + System.setProperty("dd.metrics.otel.enabled", "true"); + JvmOtlpRuntimeMetrics.start(false); + } + + @Test + void registersOnlyStableMetricsWhenExperimentalDisabled() { + JvmOtlpRuntimeMetricsTest.MetricCollector collector = + new JvmOtlpRuntimeMetricsTest.MetricCollector(); + OtelMetricRegistry.INSTANCE.collectMetrics(collector); + + Set names = collector.metricNames; + + List expectedStableMetrics = + Arrays.asList( + "jvm.memory.used", + "jvm.memory.committed", + "jvm.memory.limit", + "jvm.memory.used_after_last_gc", + "jvm.thread.count", + "jvm.class.loaded", + "jvm.class.count", + "jvm.class.unloaded", + "jvm.cpu.time", + "jvm.cpu.count", + "jvm.cpu.recent_utilization", + "jvm.gc.duration"); + for (String metric : expectedStableMetrics) { + assertTrue( + names.contains(metric), + "Expected stable metric '" + metric + "' not found. Got: " + new TreeSet<>(names)); + } + + List developmentMetrics = + Arrays.asList( + "jvm.memory.init", + "jvm.buffer.memory.used", + "jvm.buffer.memory.limit", + "jvm.buffer.count", + "jvm.system.cpu.utilization", + "jvm.system.cpu.load_1m", + "jvm.file_descriptor.count", + "jvm.file_descriptor.limit"); + for (String metric : developmentMetrics) { + assertFalse( + names.contains(metric), + "Development metric '" + + metric + + "' should not be registered when experimental disabled. Got: " + + new TreeSet<>(names)); + } + } + + @Test + void jvmGcDurationDataPointsOmitGcCauseWhenExperimentalDisabled() throws InterruptedException { + System.gc(); + + List points = null; + long deadlineNanos = System.nanoTime() + TimeUnit.SECONDS.toNanos(2); + while (System.nanoTime() < deadlineNanos) { + JvmOtlpRuntimeMetricsTest.MetricCollector collector = + new JvmOtlpRuntimeMetricsTest.MetricCollector(); + OtelMetricRegistry.INSTANCE.collectMetrics(collector); + points = collector.points.get("jvm.gc.duration"); + if (points != null && !points.isEmpty()) { + break; + } + Thread.sleep(50); + } + + assertNotNull(points, "jvm.gc.duration should have data points after System.gc()"); + assertFalse(points.isEmpty(), "jvm.gc.duration should have at least one data point"); + assertTrue( + points.stream() + .allMatch( + p -> + p.attrs.containsKey("jvm.gc.name") + && p.attrs.containsKey("jvm.gc.action") + && !p.attrs.containsKey("jvm.gc.cause")), + "jvm.gc.duration data points must carry jvm.gc.name and jvm.gc.action, but not jvm.gc.cause" + + " when experimental disabled"); + } +} From 40ef357305c5db6ced58227b7c89ae5c39c6008b Mon Sep 17 00:00:00 2001 From: Matthew Li Date: Wed, 20 May 2026 15:19:16 -0400 Subject: [PATCH 5/5] removing unnecessary test and adding configs --- .../metrics/JvmOtlpRuntimeMetricsTest.java | 17 ----------------- metadata/supported-configurations.json | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java b/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java index 720a65bd723..efd5f695786 100644 --- a/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java +++ b/dd-java-agent/instrumentation/opentelemetry/opentelemetry-1.47/src/test/java/opentelemetry147/metrics/JvmOtlpRuntimeMetricsTest.java @@ -175,23 +175,6 @@ void jvmGcDurationRecordsDataPointsAfterGc() throws InterruptedException { "Every jvm.gc.duration data point should carry jvm.gc.name and jvm.gc.action attributes"); } - @Test - void startIsIdempotent() { - MetricCollector before = new MetricCollector(); - OtelMetricRegistry.INSTANCE.collectMetrics(before); - int countBefore = before.metricNames.size(); - - JvmOtlpRuntimeMetrics.start(true); - JvmOtlpRuntimeMetrics.start(true); - - MetricCollector after = new MetricCollector(); - OtelMetricRegistry.INSTANCE.collectMetrics(after); - assertEquals( - countBefore, - after.metricNames.size(), - "Repeated start() must not register duplicate instruments"); - } - static final class DataPointEntry { final Map attrs; final Number value; diff --git a/metadata/supported-configurations.json b/metadata/supported-configurations.json index 01087b11aa4..f166f3f5d6e 100644 --- a/metadata/supported-configurations.json +++ b/metadata/supported-configurations.json @@ -2417,6 +2417,14 @@ "aliases": [] } ], + "DD_METRICS_OTEL_EXPERIMENTAL_ENABLED": [ + { + "version": "A", + "type": "boolean", + "default": "true", + "aliases": [] + } + ], "DD_OBFUSCATION_QUERY_STRING_REGEXP": [ { "version": "A", @@ -11489,6 +11497,14 @@ "aliases": [] } ], + "OTEL_INSTRUMENTATION_RUNTIME_TELEMETRY_EMIT_EXPERIMENTAL_METRICS": [ + { + "version": "A", + "type": "string", + "default": null, + "aliases": [] + } + ], "OTEL_JAVAAGENT_CONFIGURATION_FILE": [ { "version": "A",