From abc8b4b56d60800f1c54cfbb355f2b4d159d35a4 Mon Sep 17 00:00:00 2001 From: Jason Schulz Date: Wed, 10 Jun 2026 17:05:00 -0400 Subject: [PATCH] Fix multi-wire recv prefill posting receives past the wire's region RingImpl::recv's prefill compared the relative chunk count (N * buff) against the absolute end offset of the wire's region (limits[lw]). For every wire after the first, limits[lw] is large even when the wire's own region holds fewer than PIPELINE chunks (or zero, for messages smaller than lw * bytes_per_wire), so the prefill posted receives that no matching send would ever fill. in_flight then never drained and recv spun forever: point-to-point recvs of small messages deadlocked whenever a rank had more than one connection per direction. Compare against the region size (limits[lw] - write_offset[lw]) instead, mirroring the send-side prefill which advances the absolute read_offset[lw] toward limits[lw]. Co-Authored-By: Claude Fable 5 --- mlx/distributed/jaccl/lib/jaccl/ring_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlx/distributed/jaccl/lib/jaccl/ring_impl.h b/mlx/distributed/jaccl/lib/jaccl/ring_impl.h index 3ee91b3762..766f55386d 100644 --- a/mlx/distributed/jaccl/lib/jaccl/ring_impl.h +++ b/mlx/distributed/jaccl/lib/jaccl/ring_impl.h @@ -496,7 +496,7 @@ class RingImpl { // Prefill the pipeline for (int lw = 0; lw < n_wires; lw++) { int buff = 0; - while (N * buff < limits[lw] && buff < PIPELINE) { + while (N * buff < limits[lw] - write_offset[lw] && buff < PIPELINE) { recv_from(sz, buff, dir, lw); buff++;