From 3347006f576db6e6e72e9a1549a3e5b3311307aa Mon Sep 17 00:00:00 2001 From: Hugo Vacher Date: Wed, 29 Apr 2026 12:16:34 -0400 Subject: [PATCH] Don't crash on client disconnect (Server and Application) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #724. When a Spring client disconnects before the server-side handshake completes — most commonly because Spring.connect_timeout (default 5s) fires on a large app whose preloader takes longer to boot — writes to that client raise Errno::EPIPE. There are crash sites in two processes, with multiple unrescued write call sites in the preloader. Spring::Server#serve -------------------- server.rb calls `client.puts env.version` early in serve. On a disconnected client this raises EPIPE, propagates out of serve, up through the `loop { serve server.accept }` in start_server, and kills the entire server process. The OS socket file lingers, so every subsequent client takes the warm-run path, connects, and times out in verify_server_version until the user runs `spring stop`. Spring::Application#serve ------------------------- Same crash mode in the preloader, with multiple unrescued write sites: * client.puts(0) — main-flow preload-success write * client.puts(1) — inner preload-failure write * print_exception(…) — write to stderr inside the outer rescue Exception handler * streams.each(&:close) — IO#close performs a buffered-write flush, so when print_exception buffers content to a broken stderr, close-time flush re-raises EPIPE * client.puts(1) if pid — exit-status write inside the outer rescue Exception handler Each of these is now wrapped in `begin / rescue Errno::EPIPE / end` so EPIPE during error reporting doesn't itself crash the process. EPIPE in the main flow falls through to the existing `rescue Exception` handler, which already calls `manager.puts unless pid` — important, because without that no-pid newline ApplicationManager#run would block indefinitely on `child.gets.to_i` and deadlock the server's single-threaded accept loop. A conditional log line preserves a friendlier "client disconnected" diagnostic message for EPIPE distinct from the generic "exception:" line used for other failures. Tests ----- test/unit/server_test.rb covers Spring::Server#serve. test/unit/application_test.rb covers Spring::Application#serve at all three EPIPE sites: * cached preload (client.puts(0) before fork) * fresh preload success (client.puts(0) after preload returns) * preload failure (client.puts(1) inside the inner rescue, then EPIPE on print_exception/streams/client.puts(1) inside the outer rescue Exception handler) Each Application#serve test also asserts the manager handshake protocol via a real UNIXSocket.pair: serve must write both the early "got client" newline and a no-pid newline so that ApplicationManager#run does not block on child.gets.to_i. The assertion uses Timeout.timeout(2) + flunk so a hang fails loudly instead of stalling the suite. --- CHANGELOG.md | 4 ++ lib/spring/application.rb | 23 +++++-- lib/spring/server.rb | 2 + test/unit/application_test.rb | 110 ++++++++++++++++++++++++++++++++++ test/unit/server_test.rb | 52 ++++++++++++++++ 5 files changed, 186 insertions(+), 5 deletions(-) create mode 100644 test/unit/application_test.rb create mode 100644 test/unit/server_test.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e67f1a7..9b2e7d49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## Next Release + +* Fixed crashes when a client disconnects mid-handshake (e.g. on connect timeout). Previously, `Errno::EPIPE` raised in `Spring::Server#serve` or `Spring::Application#serve` would propagate up through the accept loop and kill the process, leaving a stale socket that broke every subsequent client. Both crash sites are now rescued, including writes that happen inside the `rescue Exception` handler in `Application#serve` while reporting an earlier failure to the gone client. + ## 4.4.0 * Revert the removal of UTF-8 force encoding in JSON loading. diff --git a/lib/spring/application.rb b/lib/spring/application.rb index 64853d40..3bb25631 100644 --- a/lib/spring/application.rb +++ b/lib/spring/application.rb @@ -185,7 +185,7 @@ def serve(client) client.puts(0) # preload success rescue Exception log "preload failed" - client.puts(1) # preload failure + ignore_client_disconnect { client.puts(1) } # preload failure raise end end @@ -246,15 +246,19 @@ def serve(client) wait pid, streams, client rescue Exception => e - log "exception: #{e}" + if e.is_a?(Errno::EPIPE) + log "client disconnected (#{e.message}), ignoring command" + else + log "exception: #{e}" + end manager.puts unless pid if streams && !e.is_a?(SystemExit) - print_exception(stderr, e) - streams.each(&:close) + ignore_client_disconnect { print_exception(stderr, e) } + streams.each { |stream| ignore_client_disconnect { stream.close } } end - client.puts(1) if pid + ignore_client_disconnect { client.puts(1) if pid } client.close ensure # Redirect STDOUT and STDERR to prevent from keeping the original FDs @@ -411,6 +415,15 @@ def wait(pid, streams, client) private + # Tolerate Errno::EPIPE on writes to the client socket. Once the client + # disconnects, every subsequent write to it raises — that's expected + # during `serve`'s status reporting; we'd be talking to a process + # that's gone. + def ignore_client_disconnect + yield + rescue Errno::EPIPE + end + def active_record_configured? defined?(ActiveRecord::Base) && ActiveRecord::Base.configurations.any? end diff --git a/lib/spring/server.rb b/lib/spring/server.rb index 6eae1ea8..1c6901a5 100644 --- a/lib/spring/server.rb +++ b/lib/spring/server.rb @@ -74,6 +74,8 @@ def serve(client) end rescue SocketError => e raise e unless client.eof? + rescue Errno::EPIPE => e + log "client disconnected with error #{e.message}, ignoring command" ensure redirect_output end diff --git a/test/unit/application_test.rb b/test/unit/application_test.rb new file mode 100644 index 00000000..c4534046 --- /dev/null +++ b/test/unit/application_test.rb @@ -0,0 +1,110 @@ +require_relative "../helper" +require "spring/env" +require "spring/application" +require "timeout" + +# Regression tests for https://github.com/rails/spring/issues/724: +# disconnected client raises Errno::EPIPE in #serve, which used to crash +# the application process. Each test exercises one of the EPIPE call +# sites in `Spring::Application#serve` with a `UNIXSocket.pair` whose +# client side has been closed. +class ApplicationTest < ActiveSupport::TestCase + def setup + @log_file = File.open(File::NULL, "a") + @spring_env = Spring::Env.new(log_file: @log_file) + @manager_read, @manager_write = UNIXSocket.pair + @app = Spring::Application.new(@manager_write, {}, @spring_env) + @server_sock = build_disconnected_application_client + end + + def teardown + @server_sock&.close + @manager_read&.close + @manager_write&.close + @log_file&.close + end + + test "#serve does not raise when the client disconnects before the cached-preload-success write" do + @app.define_singleton_method(:preloaded?) { true } + + with_saved_stdio { @app.serve(@server_sock) } + + assert_manager_handshake_complete + end + + test "#serve does not raise when the client disconnects before the fresh-preload-success write" do + @app.define_singleton_method(:preload) {} + + with_saved_stdio { @app.serve(@server_sock) } + + assert_manager_handshake_complete + end + + test "#serve does not raise when the client disconnects during preload-failure recovery" do + @app.define_singleton_method(:preload) { raise RuntimeError, "simulated preload failure" } + + with_saved_stdio { @app.serve(@server_sock) } + + assert_manager_handshake_complete + end + + private + + # Simulates a client that handed off its STDOUT/STDERR/STDIN to the + # application and then died. The 3 stream FDs sent to the application: + # + # - STDOUT, STDERR: write-ends of pipes whose read ends are closed, so + # writes (e.g. via `print_exception`) raise Errno::EPIPE. + # - STDIN: read-end of a pipe whose write end is closed, so reads return + # EOF (matches a dead client's STDIN). + # + # The UNIXSocket itself is also closed on the client side, so writes to + # `client` (e.g. `client.puts(0)`) raise Errno::EPIPE too. + def build_disconnected_application_client + server_sock, client_sock = UNIXSocket.pair + + out_r, out_w = IO.pipe + err_r, err_w = IO.pipe + in_r, in_w = IO.pipe + [[out_r, out_w], [err_r, err_w]].each do |r, w| + client_sock.send_io(w) + r.close + w.close + end + client_sock.send_io(in_r) + in_r.close + in_w.close + + client_sock.close + server_sock + end + + # Application#serve reopens STDOUT, STDERR, and STDIN to the streams it + # received from the client (and to its log_file in `reset_streams`). + # Save and restore the test runner's streams around the call. + def with_saved_stdio + saved_out = STDOUT.dup + saved_err = STDERR.dup + saved_in = STDIN.dup + yield + ensure + STDOUT.reopen(saved_out) if saved_out + STDERR.reopen(saved_err) if saved_err + STDIN.reopen(saved_in) if saved_in + [saved_out, saved_err, saved_in].compact.each { |io| io.close rescue nil } + end + + # `serve` writes two newlines to the manager: an early "got client" ack + # and a no-pid response from the rescue handler. Without the second, + # `ApplicationManager#run` blocks forever on `child.gets.to_i` and the + # server's single-threaded accept loop deadlocks. Timeout + flunk fails + # loudly instead of stalling the suite. + def assert_manager_handshake_complete + acks = Timeout.timeout(2) { [@manager_read.gets, @manager_read.gets] } + assert_equal ["\n", "\n"], acks + rescue Timeout::Error + flunk "Application#serve did not send the no-pid response to the manager " \ + "— ApplicationManager#run will block on child.gets.to_i and deadlock " \ + "the server's accept loop" + end +end diff --git a/test/unit/server_test.rb b/test/unit/server_test.rb new file mode 100644 index 00000000..40d9c4d3 --- /dev/null +++ b/test/unit/server_test.rb @@ -0,0 +1,52 @@ +require_relative "../helper" +require "spring/env" +require "spring/server" +require "fileutils" +require "tmpdir" + +# Regression test for https://github.com/rails/spring/issues/724: +# disconnected client raises Errno::EPIPE in #serve, which used to crash +# the accept loop and require `spring stop` to recover. +class ServerTest < ActiveSupport::TestCase + def setup + @tmpdir = Dir.mktmpdir("spring-server-test") + @log_file = File.open(File::NULL, "a") + @env = Spring::Env.new(log_file: @log_file) + # Pin the env's pidfile path to the test's tmpdir so we don't pollute + # the user's spring tmp directory. + pidfile_path = Pathname.new(File.join(@tmpdir, "spring.pid")) + @env.define_singleton_method(:pidfile_path) { pidfile_path } + end + + def teardown + @log_file&.close + FileUtils.remove_entry(@tmpdir) if @tmpdir + end + + test "#serve does not raise when the client disconnects before the version banner is sent" do + server = Spring::Server.new(env: @env) + server_sock, client_sock = UNIXSocket.pair + client_sock.close + + assert_nothing_raised do + with_saved_stdio { server.serve(server_sock) } + end + ensure + server_sock&.close + end + + private + + # Server#serve calls `redirect_output` in `ensure`, which reopens + # STDOUT/STDERR to env.log_file. Save and restore the test runner's + # streams around the call so subsequent tests still log normally. + def with_saved_stdio + saved_out = STDOUT.dup + saved_err = STDERR.dup + yield + ensure + STDOUT.reopen(saved_out) if saved_out + STDERR.reopen(saved_err) if saved_err + [saved_out, saved_err].compact.each { |io| io.close rescue nil } + end +end