From 1d9d59500d2cda86ba31f2019cdd92a3ba85011c Mon Sep 17 00:00:00 2001 From: Sandi Fatic <3193842+chefsale@users.noreply.github.com> Date: Fri, 22 May 2026 11:27:31 +0200 Subject: [PATCH] relay: tune libp2p relay::Config for production-grade workloads The boot-node was running with libp2p-relay's `Config::default()` for every field except `max_circuit_bytes` (which had already been bumped from 128 KiB to 100 MiB). The remaining defaults are sized for the libp2p test-suite, not for a multi-user network, and they are visible in clients' reservation-accept logs as: Limit { duration: Some(120s), data_in_bytes: Some(104857600) } ^^^^^^^^^^^^^^^^^^^ libp2p default max_circuit_duration = 2 * 60 s A 2-minute per-circuit lifetime is short enough that gossipsub mesh formation, state-delta sync, and any non-trivial application stream gets cut mid-flight. The 4-circuits-per-peer cap further limits how many distinct remote peers a single client can talk to through this relay simultaneously. Set the following on top of `Config::default()`: max_circuit_bytes = 1 GiB (was 100 MiB) max_circuit_duration = 1 hour (was 2 minutes) max_circuits = 4096 (was 16) max_circuits_per_peer = 256 (was 4) max_reservations = 2048 (was 128) max_reservations_per_peer = 8 (was 4) These values size a single relay instance for ~200 active users holding circuits to ~20 distinct peers each, with sync/state-delta streams allowed to run for an hour and up to 1 GiB per circuit. The EC2 instance hosting the relay should be at least `c7g.large` (2 vCPU / 4 GiB RAM) to provide enough memory for per-circuit state at these caps; the current `c7g.medium` (1 vCPU / 2 GiB) will be tight under the new totals. Reservation/circuit rate-limiters remain at libp2p defaults (30/peer/2min and 60/IP/min). Those weren't the binding constraint in the failure-mode logs that motivated this change, and leaving them in place keeps a sensible abuse-prevention floor. Bumps version to 0.8.0 for the AMI / Terraform pin to track. Related: calimero-network/infrastructure#89 (fleet expansion + EC2 sizing), calimero-network/core#2438 (client-side complement: dial throttling, address filtering, AutoNATv2 candidate filtering). Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/main.rs | 23 ++++++++++++++++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5b2c508..0368e45 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -473,7 +473,7 @@ dependencies = [ [[package]] name = "boot-node" -version = "0.7.0" +version = "0.8.0" dependencies = [ "axum", "calimero-network-primitives", diff --git a/Cargo.toml b/Cargo.toml index 7cfa982..4518871 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "boot-node" -version = "0.7.0" +version = "0.8.0" authors = ["Calimero Limited "] edition = "2021" repository = "https://github.com/calimero-network/boot-node" diff --git a/src/main.rs b/src/main.rs index 5444bf2..67b5439 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ use std::net::Ipv4Addr; +use std::time::Duration; use clap::Parser; use libp2p::futures::prelude::*; @@ -15,7 +16,22 @@ mod http_service; const PROTOCOL_VERSION: &str = concat!("/", env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")); const CALIMERO_KAD_PROTO_NAME: StreamProtocol = StreamProtocol::new("/calimero/kad/1.0.0"); -const MAX_RELAY_CIRCUIT_BYTES: u64 = 100 << 20; // 100 MiB + +// libp2p `relay::Config::default()` ships with values sized for the test suite, +// not production: 2-min circuits, 128 KiB per circuit, 16 total circuits, 4 +// circuits-per-peer, 128 total reservations, 4 reservations-per-peer. The +// per-peer caps in particular bottleneck NAT-bound clients in any non-trivial +// network. The overrides below size each relay node for ~200 active users +// holding circuits to ~20 distinct peers each, with sync/state-delta streams +// allowed to run for an hour and up to 1 GiB per circuit. Tune in lockstep +// with the EC2 instance size — these settings expect at least `c7g.large` +// (2 vCPU / 4 GiB) so the per-circuit state fits. +const MAX_RELAY_CIRCUIT_BYTES: u64 = 1 << 30; // 1 GiB +const MAX_RELAY_CIRCUIT_DURATION: Duration = Duration::from_secs(60 * 60); // 1 hour +const MAX_RELAY_CIRCUITS: usize = 4096; +const MAX_RELAY_CIRCUITS_PER_PEER: usize = 256; +const MAX_RELAY_RESERVATIONS: usize = 2048; +const MAX_RELAY_RESERVATIONS_PER_PEER: usize = 8; #[derive(NetworkBehaviour)] struct Behaviour { @@ -132,6 +148,11 @@ async fn main() -> eyre::Result<()> { relay: relay::Behaviour::new(keypair.public().to_peer_id(), { let mut x = relay::Config::default(); x.max_circuit_bytes = MAX_RELAY_CIRCUIT_BYTES; + x.max_circuit_duration = MAX_RELAY_CIRCUIT_DURATION; + x.max_circuits = MAX_RELAY_CIRCUITS; + x.max_circuits_per_peer = MAX_RELAY_CIRCUITS_PER_PEER; + x.max_reservations = MAX_RELAY_RESERVATIONS; + x.max_reservations_per_peer = MAX_RELAY_RESERVATIONS_PER_PEER; x }), }