diff --git a/internal/stdioserver/host.go b/internal/stdioserver/host.go index 6686257d..5e2e97f4 100644 --- a/internal/stdioserver/host.go +++ b/internal/stdioserver/host.go @@ -41,6 +41,18 @@ const forceExitAfter = 75 * time.Millisecond // retry" — which matches the intent exactly. const idleWatchdogExitCode = 75 +// transportErrorExitCode is the process exit code used when the inbound stdin +// scanner fails with a non-EOF error — i.e. the transport (the SSH channel +// carrying our stdin) was reset or broke mid-stream rather than closing +// cleanly. It is deliberately non-zero so the client's handleClose reconnects +// instead of treating the close as an intentional shutdown: a clean EOF means +// the client deliberately closed the channel (exit 0, no reconnect), whereas a +// read error means the link died under us and an automatic reconnect is the +// desired recovery. 74 is EX_IOERR from sysexits.h — "an error occurred while +// doing I/O on some file" — which names the cause precisely and stays distinct +// from the watchdog's EX_TEMPFAIL (75) in diagnostics. +const transportErrorExitCode = 74 + // Host owns the stdio NDJSON server lifecycle. One Host per process — // stdin / stdout are not multiplexable, and the SIGTERM handler is a // process-global side effect. @@ -187,10 +199,16 @@ func (h *Host) Run() { } if err := scanner.Err(); err != nil { - // We have no request id to correlate scanner errors with, so - // emit a sentinel id and let the client treat it as a - // transport-level protocol failure. + // A non-EOF scanner error means the transport broke mid-stream (the + // SSH channel carrying stdin was reset, not closed cleanly). We have + // no request id to correlate it with, so emit a sentinel id as a + // best-effort transport-level protocol failure — it may not reach a + // client whose link is already gone — then exit non-zero so a still- + // present client reconnects rather than treating this as a clean + // shutdown. A clean EOF (Err() == nil) falls through to exit 0. _ = h.WriteFrame(proto.ProtocolFailure(proto.ProtocolErrorID, err.Error())) + h.drainAndExit(transportErrorExitCode) + return } h.drainAndExit(0) } diff --git a/internal/stdioserver/host_transport_test.go b/internal/stdioserver/host_transport_test.go new file mode 100644 index 00000000..bab718fd --- /dev/null +++ b/internal/stdioserver/host_transport_test.go @@ -0,0 +1,74 @@ +// Package stdioserver — transport-close classification tests. +// +// Run() must distinguish a clean stdin EOF (the client deliberately closed the +// channel → exit 0, no client reconnect) from a non-EOF scanner error (the SSH +// channel carrying stdin was reset mid-stream → exit non-zero so the client +// reconnects). Before this distinction existed, every drop — including a +// transient network reset — exited 0 and the client treated it as an +// intentional shutdown, leaving the workspace permanently disconnected. +package stdioserver + +import ( + "errors" + "io" + "log/slog" + "strings" + "testing" + "time" + + "github.com/nexus-code/nexus-code/internal/dispatch" +) + +// errReader returns a non-EOF error on the first Read, simulating an SSH +// channel reset mid-stream (as opposed to bufio.Scanner's clean-EOF path, +// which surfaces as Err() == nil). +type errReader struct{ err error } + +func (r errReader) Read(p []byte) (int, error) { return 0, r.err } + +func newRunHost(in io.Reader) (*Host, chan int) { + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + host := New(dispatch.New(), in, io.Discard, logger) + exited := make(chan int, 1) + host.exit = func(code int) { + select { + case exited <- code: + default: + } + } + return host, exited +} + +// A non-EOF inbound read error must exit transportErrorExitCode so the client's +// handleClose reconnects instead of treating the dropped link as clean. +func TestRunExitsNonZeroOnTransportError(t *testing.T) { + host, exited := newRunHost(errReader{err: errors.New("read tcp: connection reset by peer")}) + + go host.Run() + + select { + case code := <-exited: + if code != transportErrorExitCode { + t.Fatalf("exit code = %d, want %d (transport reset must trigger client reconnect)", code, transportErrorExitCode) + } + case <-time.After(2 * time.Second): + t.Fatal("Run did not exit within 2s of a transport read error") + } +} + +// A clean EOF (client deliberately closed stdin) must exit 0 so the client +// treats it as an intentional shutdown with no reconnect. +func TestRunExitsZeroOnCleanEOF(t *testing.T) { + host, exited := newRunHost(strings.NewReader("")) + + go host.Run() + + select { + case code := <-exited: + if code != 0 { + t.Fatalf("exit code = %d, want 0 (clean EOF must not trigger reconnect)", code) + } + case <-time.After(2 * time.Second): + t.Fatal("Run did not exit within 2s of clean EOF") + } +} diff --git a/package.json b/package.json index 69928c7e..16e234d5 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "nexus-code", "productName": "NexusCode", - "version": "0.5.3", + "version": "0.5.4", "description": "Multi-workspace VSCode-style editor for macOS. Monaco editor + terminal in one window.", "license": "MIT", "private": true,