Skip to content
Draft

init #11411

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
d308063
Adding configs and metrics creation
link04 May 7, 2026
5e8567f
Adding test file to check metrics are collected
link04 May 7, 2026
4a2901f
Doing clean up after testing
link04 May 7, 2026
6a04bcc
Merge branch 'master' into maximo/otlp-runtime-metrics
link04 May 8, 2026
953c871
Merge branch 'master' into maximo/otlp-runtime-metrics
link04 May 8, 2026
9759292
move JvmOtlpRuntimeMetrics.java to agent-jmxfetch
mhlidd May 14, 2026
729a87e
prevent JMXFetch from emitting jvm metrics when otlp is enabled; migr…
mhlidd May 14, 2026
5a3a4d1
update JMXFetch to only emit either OTLP or JMX runtime metrics
mhlidd May 14, 2026
8e79c2e
Merge branch 'master' into maximo/otlp-runtime-metrics
mhlidd May 15, 2026
414112d
send otlp_jmx_config when otlp runtime metrics enabled
mhlidd May 15, 2026
4533351
update test to assert on guarantees instead of dependent on GC collec…
mhlidd May 15, 2026
0f455be
Merge branch 'master' into maximo/otlp-runtime-metrics
mhlidd May 15, 2026
25a6396
adding exception handling for callback
mhlidd May 15, 2026
d42101d
Merge remote-tracking branch 'origin/master' into maximo/otlp-runtime…
mcculls May 18, 2026
c13e51e
Minor fixes to use correct storage for observable counters
mcculls May 18, 2026
4f2d638
Cleanup
mcculls May 18, 2026
972848b
Merge branch 'master' into maximo/otlp-runtime-metrics
mhlidd May 18, 2026
96aba06
init
mhlidd May 18, 2026
62d9b50
update checks to match OTel checks
mhlidd May 18, 2026
52e3a26
Merge branch 'master' into mhlidd/otlp_runtime_metrics_follow_up
mhlidd May 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ private static void run(final StatsDClientManager statsDClientManager, final Con
// Register JVM runtime metric callbacks against the OtelMeterProvider so the OTLP
// exporter started by CoreTracer collects them. Started here so it rides the same
// delayed-start path as JMXFetch itself.
JvmOtlpRuntimeMetrics.start();
JvmOtlpRuntimeMetrics.start(config.isMetricsOtelExperimentalEnabled());
// When the OTLP exporter is collecting JVM runtime metrics, skip the default JMXFetch
// JVM config to avoid double-reporting.
defaultConfigs.add(OTLP_JMX_CONFIG);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import static datadog.trace.bootstrap.otel.metrics.OtelInstrumentType.UP_DOWN_COUNTER;

import com.sun.management.OperatingSystemMXBean;
import com.sun.management.UnixOperatingSystemMXBean;
import datadog.trace.bootstrap.otel.api.common.AttributeKey;
import datadog.trace.bootstrap.otel.api.common.Attributes;
import datadog.trace.bootstrap.otel.common.OtelInstrumentationScope;
Expand Down Expand Up @@ -50,8 +51,14 @@ public final class JvmOtlpRuntimeMetrics {

private static final AtomicBoolean started = new AtomicBoolean(false);

/** Registers all JVM runtime metric instruments on the bootstrap-level metric registry. */
public static void start() {
/**
* Registers all JVM runtime metric instruments on the bootstrap-level metric registry.
*
* @param emitExperimentalMetrics when {@code true} (the spec-aligned default), metrics marked as
* <em>Development</em> in the OTel semantic conventions are also registered. When {@code
* false}, only metrics with stable status are emitted.
*/
public static void start(boolean emitExperimentalMetrics) {
if (!started.compareAndSet(false, true)) {
return;
}
Expand All @@ -66,20 +73,30 @@ public static void start() {
((Attributes) attributes)
.forEach((a, v) -> visitor.visitAttribute(a.getType().ordinal(), a.getKey(), v)));

// Stable metrics — always registered.
registerMemoryMetrics();
registerBufferMetrics();
registerThreadMetrics();
registerClassLoadingMetrics();
registerCpuMetrics();
log.debug("Started OTLP runtime metrics with OTel-native naming (jvm.*)");

// Development-status metrics — gated by the experimental flag.
if (emitExperimentalMetrics) {
registerMemoryInitMetric();
registerBufferMetrics();
registerSystemCpuMetrics();
registerFileDescriptorMetrics();
}
log.debug(
"Started OTLP runtime metrics with OTel-native naming (jvm.*), experimental={}",
emitExperimentalMetrics);
} catch (Exception e) {
log.error("Failed to start JVM OTLP runtime metrics", e);
}
}

/**
* jvm.memory.used, jvm.memory.committed, jvm.memory.limit, jvm.memory.init,
* jvm.memory.used_after_last_gc — all UpDownCounter per spec.
* jvm.memory.used, jvm.memory.committed, jvm.memory.limit, jvm.memory.used_after_last_gc — all
* Stable per spec. All UpDownCounter.
*/
private static void registerMemoryMetrics() {
MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean();
Expand Down Expand Up @@ -118,37 +135,21 @@ private static void registerMemoryMetrics() {
UP_DOWN_COUNTER,
storage -> {
long heapMax = memoryBean.getHeapMemoryUsage().getMax();
if (heapMax > 0) {
if (heapMax != -1) {
storage.recordLong(heapMax, HEAP_ATTRS);
}
long nonHeapMax = memoryBean.getNonHeapMemoryUsage().getMax();
if (nonHeapMax > 0) {
if (nonHeapMax != -1) {
storage.recordLong(nonHeapMax, NON_HEAP_ATTRS);
}
for (MemoryPoolMXBean pool : pools) {
long max = pool.getUsage().getMax();
if (max > 0) {
if (max != -1) {
storage.recordLong(max, poolAttributes(pool));
}
}
});

registerLongObservable(
"jvm.memory.init",
"Measure of initial memory requested.",
"By",
UP_DOWN_COUNTER,
storage -> {
long heapInit = memoryBean.getHeapMemoryUsage().getInit();
if (heapInit > 0) {
storage.recordLong(heapInit, HEAP_ATTRS);
}
long nonHeapInit = memoryBean.getNonHeapMemoryUsage().getInit();
if (nonHeapInit > 0) {
storage.recordLong(nonHeapInit, NON_HEAP_ATTRS);
}
});

registerLongObservable(
"jvm.memory.used_after_last_gc",
"Measure of memory used after the most recent garbage collection event.",
Expand All @@ -164,6 +165,26 @@ private static void registerMemoryMetrics() {
});
}

/** jvm.memory.init (UpDownCounter, Development). */
private static void registerMemoryInitMetric() {
MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean();
registerLongObservable(
"jvm.memory.init",
"Measure of initial memory requested.",
"By",
UP_DOWN_COUNTER,
storage -> {
long heapInit = memoryBean.getHeapMemoryUsage().getInit();
if (heapInit != -1) {
storage.recordLong(heapInit, HEAP_ATTRS);
}
long nonHeapInit = memoryBean.getNonHeapMemoryUsage().getInit();
if (nonHeapInit != -1) {
storage.recordLong(nonHeapInit, NON_HEAP_ATTRS);
}
});
}

/** jvm.buffer.* (UpDownCounter, Development) — direct + mapped pool metrics. */
private static void registerBufferMetrics() {
List<BufferPoolMXBean> bufferPools =
Expand Down Expand Up @@ -234,10 +255,7 @@ private static void registerClassLoadingMetrics() {
* Stable per spec.
*/
private static void registerCpuMetrics() {
java.lang.management.OperatingSystemMXBean rawOsBean =
ManagementFactory.getOperatingSystemMXBean();
OperatingSystemMXBean osBean =
rawOsBean instanceof OperatingSystemMXBean ? (OperatingSystemMXBean) rawOsBean : null;
OperatingSystemMXBean osBean = sunOsBean();

if (osBean != null) {
registerDoubleObservable(
Expand All @@ -263,6 +281,9 @@ private static void registerCpuMetrics() {
storage.recordDouble(cpuLoad, Attributes.empty());
}
});
} else {
log.debug(
"com.sun.management.OperatingSystemMXBean not available; skipping jvm.cpu.time and jvm.cpu.recent_utilization");
}

registerLongObservable(
Expand All @@ -274,6 +295,91 @@ private static void registerCpuMetrics() {
storage.recordLong(Runtime.getRuntime().availableProcessors(), Attributes.empty()));
}

/**
* jvm.system.cpu.utilization (Gauge) and jvm.system.cpu.load_1m (Gauge) — both Development per
* spec.
*/
private static void registerSystemCpuMetrics() {
OperatingSystemMXBean osBean = sunOsBean();
if (osBean != null) {
registerDoubleObservable(
"jvm.system.cpu.utilization",
"Recent CPU utilization for the whole system as reported by the JVM.",
"1",
GAUGE,
storage -> {
double load = osBean.getSystemCpuLoad();
if (load >= 0) {
storage.recordDouble(load, Attributes.empty());
}
});
} else {
log.debug(
"com.sun.management.OperatingSystemMXBean not available; skipping jvm.system.cpu.utilization");
}

java.lang.management.OperatingSystemMXBean stdOsBean =
ManagementFactory.getOperatingSystemMXBean();
registerDoubleObservable(
"jvm.system.cpu.load_1m",
"Average CPU load of the whole system for the last minute as reported by the JVM.",
"{run_queue_item}",
GAUGE,
storage -> {
double load = stdOsBean.getSystemLoadAverage();
if (load >= 0) {
storage.recordDouble(load, Attributes.empty());
}
});
}

/**
* jvm.file_descriptor.count (UpDownCounter) and jvm.file_descriptor.limit (UpDownCounter) — both
* Development per spec. Only registered when the underlying JVM exposes {@link
* UnixOperatingSystemMXBean} (Unix-like platforms).
*/
private static void registerFileDescriptorMetrics() {
java.lang.management.OperatingSystemMXBean rawOsBean =
ManagementFactory.getOperatingSystemMXBean();
if (!(rawOsBean instanceof UnixOperatingSystemMXBean)) {
log.debug(
"com.sun.management.UnixOperatingSystemMXBean not available (non-Unix JVM); skipping jvm.file_descriptor.count and jvm.file_descriptor.limit");
return;
}
UnixOperatingSystemMXBean unixOsBean = (UnixOperatingSystemMXBean) rawOsBean;

registerLongObservable(
"jvm.file_descriptor.count",
"Number of open file descriptors as reported by the JVM.",
"{file_descriptor}",
UP_DOWN_COUNTER,
storage -> {
long count = unixOsBean.getOpenFileDescriptorCount();
if (count >= 0) {
storage.recordLong(count, Attributes.empty());
}
});

registerLongObservable(
"jvm.file_descriptor.limit",
"Measure of max open file descriptors as reported by the JVM.",
"{file_descriptor}",
UP_DOWN_COUNTER,
storage -> {
long limit = unixOsBean.getMaxFileDescriptorCount();
if (limit >= 0) {
storage.recordLong(limit, Attributes.empty());
}
});
}

/** Returns the {@code com.sun.management} OS bean if available on this JVM, else {@code null}. */
private static OperatingSystemMXBean sunOsBean() {
java.lang.management.OperatingSystemMXBean rawOsBean =
ManagementFactory.getOperatingSystemMXBean();
return rawOsBean instanceof OperatingSystemMXBean ? (OperatingSystemMXBean) rawOsBean : null;
}

/**
* Registers an UpDownCounter that iterates each platform buffer pool and records {@code getter}
* with the {@code jvm.buffer.pool.name} attribute. Skips negative readings.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.sun.management.UnixOperatingSystemMXBean;
import datadog.trace.agent.jmxfetch.JvmOtlpRuntimeMetrics;
import datadog.trace.bootstrap.otel.common.OtelInstrumentationScope;
import datadog.trace.bootstrap.otel.metrics.OtelInstrumentDescriptor;
Expand All @@ -15,6 +16,7 @@
import datadog.trace.bootstrap.otlp.metrics.OtlpMetricVisitor;
import datadog.trace.bootstrap.otlp.metrics.OtlpMetricsVisitor;
import datadog.trace.bootstrap.otlp.metrics.OtlpScopedMetricsVisitor;
import java.lang.management.ManagementFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
Expand Down Expand Up @@ -43,7 +45,7 @@ public class JvmOtlpRuntimeMetricsTest {
@BeforeAll
static void setUp() {
System.setProperty("dd.metrics.otel.enabled", "true");
JvmOtlpRuntimeMetrics.start();
JvmOtlpRuntimeMetrics.start(true);
}

@Test
Expand All @@ -67,7 +69,9 @@ void registersExpectedJvmMetrics() {
"jvm.class.unloaded",
"jvm.cpu.time",
"jvm.cpu.count",
"jvm.cpu.recent_utilization");
"jvm.cpu.recent_utilization",
"jvm.system.cpu.utilization",
"jvm.system.cpu.load_1m");

Set<String> names = collector.metricNames;
for (String metric : expectedMetrics) {
Expand All @@ -76,7 +80,18 @@ void registersExpectedJvmMetrics() {
"Expected metric '" + metric + "' not found. Got: " + new TreeSet<>(names));
}

assertEquals(15, names.size(), "Expected 15 metrics, got: " + new TreeSet<>(names));
int expectedSize = expectedMetrics.size();
if (ManagementFactory.getOperatingSystemMXBean() instanceof UnixOperatingSystemMXBean) {
assertTrue(
names.contains("jvm.file_descriptor.count"),
"Expected jvm.file_descriptor.count on Unix. Got: " + new TreeSet<>(names));
assertTrue(
names.contains("jvm.file_descriptor.limit"),
"Expected jvm.file_descriptor.limit on Unix. Got: " + new TreeSet<>(names));
expectedSize += 2;
}

assertEquals(expectedSize, names.size(), "Unexpected metric count: " + new TreeSet<>(names));

// No DD-proprietary names should be present
List<String> ddNames =
Expand Down Expand Up @@ -136,8 +151,8 @@ void startIsIdempotent() {
OtelMetricRegistry.INSTANCE.collectMetrics(before);
int countBefore = before.metricNames.size();

JvmOtlpRuntimeMetrics.start();
JvmOtlpRuntimeMetrics.start();
JvmOtlpRuntimeMetrics.start(true);
JvmOtlpRuntimeMetrics.start(true);

MetricCollector after = new MetricCollector();
OtelMetricRegistry.INSTANCE.collectMetrics(after);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ public final class ConfigDefaults {
static final int DEFAULT_METRICS_OTEL_TIMEOUT = 7_500; // ms
static final int DEFAULT_METRICS_OTEL_CARDINALITY_LIMIT = 2_000;

public static final boolean DEFAULT_METRICS_OTEL_EXPERIMENTAL_ENABLED = true;

public static final int DEFAULT_OTLP_TRACES_TIMEOUT = 10_000; // ms

static final String DEFAULT_OTLP_HTTP_LOGS_ENDPOINT = "v1/logs";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ public final class OtlpConfig {
public static final String METRICS_OTEL_INTERVAL = "metrics.otel.interval";
public static final String METRICS_OTEL_TIMEOUT = "metrics.otel.timeout";
public static final String METRICS_OTEL_CARDINALITY_LIMIT = "metrics.otel.cardinality.limit";
public static final String METRICS_OTEL_EXPERIMENTAL_ENABLED =
"metrics.otel.experimental.enabled";

public static final String OTLP_METRICS_ENDPOINT = "otlp.metrics.endpoint";
public static final String OTLP_METRICS_HEADERS = "otlp.metrics.headers";
Expand Down
13 changes: 13 additions & 0 deletions internal-api/src/main/java/datadog/trace/api/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
import static datadog.trace.api.ConfigDefaults.DEFAULT_LOGS_OTEL_QUEUE_SIZE;
import static datadog.trace.api.ConfigDefaults.DEFAULT_LOGS_OTEL_TIMEOUT;
import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_CARDINALITY_LIMIT;
import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_EXPERIMENTAL_ENABLED;
import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_INTERVAL;
import static datadog.trace.api.ConfigDefaults.DEFAULT_METRICS_OTEL_TIMEOUT;
import static datadog.trace.api.ConfigDefaults.DEFAULT_OTLP_GRPC_PORT;
Expand Down Expand Up @@ -466,6 +467,7 @@
import static datadog.trace.api.config.OtlpConfig.LOGS_OTEL_QUEUE_SIZE;
import static datadog.trace.api.config.OtlpConfig.LOGS_OTEL_TIMEOUT;
import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_CARDINALITY_LIMIT;
import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_EXPERIMENTAL_ENABLED;
import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_EXPORTER;
import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_INTERVAL;
import static datadog.trace.api.config.OtlpConfig.METRICS_OTEL_TIMEOUT;
Expand Down Expand Up @@ -970,6 +972,7 @@ public static String getHostName() {
private final int metricsOtelInterval;
private final int metricsOtelTimeout;
private final int metricsOtelCardinalityLimit;
private final boolean metricsOtelExperimentalEnabled;
private final String otlpMetricsEndpoint;
private final Map<String, String> otlpMetricsHeaders;
private final OtlpConfig.Protocol otlpMetricsProtocol;
Expand Down Expand Up @@ -2054,6 +2057,10 @@ private Config(final ConfigProvider configProvider, final InstrumenterConfig ins
}
metricsOtelTimeout = otelTimeout;

metricsOtelExperimentalEnabled =
configProvider.getBoolean(
METRICS_OTEL_EXPERIMENTAL_ENABLED, DEFAULT_METRICS_OTEL_EXPERIMENTAL_ENABLED);

// keep OTLP default timeout below the overall export timeout
int defaultOtlpMetricsTimeout = Math.min(metricsOtelTimeout, DEFAULT_METRICS_OTEL_TIMEOUT);
otlpTimeout = configProvider.getInteger(OTLP_METRICS_TIMEOUT, defaultOtlpMetricsTimeout);
Expand Down Expand Up @@ -5479,6 +5486,10 @@ public boolean isMetricsOtlpExporterEnabled() {
return "otlp".equalsIgnoreCase(metricsOtelExporter);
}

public boolean isMetricsOtelExperimentalEnabled() {
return metricsOtelExperimentalEnabled;
}

public int getMetricsOtelCardinalityLimit() {
return metricsOtelCardinalityLimit;
}
Expand Down Expand Up @@ -6601,6 +6612,8 @@ public String toString() {
+ metricsOtelTimeout
+ ", metricsOtelCardinalityLimit="
+ metricsOtelCardinalityLimit
+ ", metricsOtelExperimentalEnabled="
+ metricsOtelExperimentalEnabled
+ ", otlpMetricsEndpoint="
+ otlpMetricsEndpoint
+ ", otlpMetricsHeaders="
Expand Down
Loading