From d2fc42d65f33558342a8947c6e8229e481035b9f Mon Sep 17 00:00:00 2001 From: guessi Date: Wed, 17 Jun 2026 19:42:05 +0800 Subject: [PATCH] fix: replace efs-proxy bind panic with graceful error exit efs-proxy panicked with an unhelpful backtrace when it failed to bind to 127.0.0.1:. This manifested as a hard crash with no actionable error message in environments like Bottlerocket/EKS containers where the loopback bind can fail (e.g. permission denied, address in use). Change Controller::new to return Result and have main() exit with code 1 and a clear error message instead of panicking. The mount helper already detects a non-zero efs-proxy exit and surfaces it to the user via mount.log. Fixes #323 --- src/proxy/src/controller.rs | 56 ++++++++++++++++++++++++++----------- src/proxy/src/main.rs | 18 ++++++++++-- 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/src/proxy/src/controller.rs b/src/proxy/src/controller.rs index cce95db8..ca0a1175 100644 --- a/src/proxy/src/controller.rs +++ b/src/proxy/src/controller.rs @@ -110,12 +110,17 @@ impl Controller { partition_finder: Arc + Sync + Send + 'static>, status_reporter: StatusReporter, cw_publisher: Option>, - ) -> Self { - let Ok(listener) = TcpListener::bind(listen_addr).await else { - panic!("Failed to bind {}", listen_addr); - }; + ) -> Result { + let listener = TcpListener::bind(listen_addr).await.map_err(|e| { + error!( + "Failed to bind to {}: {}. \ + Ensure the address is reachable and the port is not already in use.", + listen_addr, e + ); + e + })?; - Self { + Ok(Self { listener, partition_finder, proxy_id: ProxyIdentifier::new(), @@ -125,7 +130,7 @@ impl Controller { status_reporter, proxy_config, cw_publisher, - } + }) } pub async fn run( @@ -218,7 +223,12 @@ impl Controller { } // Skip channel init if read bypass is not requested - let channel_init_config = if !self.proxy_config.nested_config.read_bypass_config.requested { + let channel_init_config = if !self + .proxy_config + .nested_config + .read_bypass_config + .requested + { ChannelInitConfig::default() } else { let configs = vec![ChannelConfigArgs::AWSFILE_READ_BYPASS_V2( @@ -503,10 +513,8 @@ impl Controller { mod tests { use super::*; use crate::{ - aws::cw_publisher::LogLevel, - config::channel_init_config::ChannelInitConfig, - proxy_builder::ProxyBuilder, - status_reporter::create_status_channel, + aws::cw_publisher::LogLevel, config::channel_init_config::ChannelInitConfig, + proxy_builder::ProxyBuilder, status_reporter::create_status_channel, }; use std::sync::atomic::AtomicU64; use tokio::net::TcpStream; @@ -599,12 +607,7 @@ mod tests { ) .await; - let mut state = IncarnationState::new( - ProxyIdentifier::new(), - None, - events_tx, - 1, - ); + let mut state = IncarnationState::new(ProxyIdentifier::new(), None, events_tx, 1); let period_secs = 10; let publisher_clone = mock_publisher.clone(); @@ -658,4 +661,23 @@ mod tests { let proxy = handle.await.unwrap(); let _ = proxy.shutdown().await; } + + #[tokio::test] + async fn test_controller_new_bind_failure_returns_err() { + // Occupy a port so the second bind attempt fails. + let occupied = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = occupied.local_addr().unwrap(); + + let (_status_requester, status_reporter) = create_status_channel(); + let result = Controller::new( + &addr.to_string(), + ProxyConfig::default(), + Arc::new(MockPartitionFinder), + status_reporter, + None, + ) + .await; + + assert!(result.is_err(), "expected Err when port is already bound"); + } } diff --git a/src/proxy/src/main.rs b/src/proxy/src/main.rs index d1432c6a..c18f1f40 100644 --- a/src/proxy/src/main.rs +++ b/src/proxy/src/main.rs @@ -100,7 +100,14 @@ async fn main() { status_reporter, cw_publisher.clone(), ) - .await; + .await + .unwrap_or_else(|_| { + let p = std::path::Path::new(&proxy_config.pid_file_path); + if p.exists() { + let _ = std::fs::remove_file(p); + } + std::process::exit(1); + }); tokio::spawn(controller.run( sigterm_cancellation_token.clone(), AwsFileRpcClient, @@ -116,7 +123,14 @@ async fn main() { status_reporter, cw_publisher.clone(), ) - .await; + .await + .unwrap_or_else(|_| { + let p = std::path::Path::new(&proxy_config.pid_file_path); + if p.exists() { + let _ = std::fs::remove_file(p); + } + std::process::exit(1); + }); tokio::spawn(controller.run( sigterm_cancellation_token.clone(), AwsFileRpcClient,