From b5eba795fa9ddaf12f997dd6439a474942613ddc Mon Sep 17 00:00:00 2001
From: Min Huang <70873102+min0625@users.noreply.github.com>
Date: Thu, 2 Jul 2026 06:28:29 +0000
Subject: [PATCH] fix: distinct SIGINT/SIGTERM exit codes, surface mid-stream
 API errors, require model name with base URL

- Map SIGINT/SIGTERM to conventional 128+N exit codes (130/143) instead of always 130
- Surface mid-stream error events/objects from all three providers instead of silently returning empty/partial output
- Detect Anthropic max_tokens truncation and return an error instead of printing incomplete output with exit 0
- Require MINT_MODEL_NAME when MINT_BASE_URL is set, since a custom endpoint has no meaningful default model
- Add LC_MESSAGES to the locale fallback chain (LC_ALL > LC_MESSAGES > LANG)
- Strip leading "v" from local build version so it matches goreleaser's version string
- Check out the release tag (not main) in publish-npm/publish-pypi workflows
- Handle unset $SHELL and killed-by-signal exit status in install.sh / npm wrapper script
---
 .github/workflows/publish-npm.yml             |   5 +
 .github/workflows/publish-pypi.yml            |   7 +-
 AGENTS.md                                     |   4 +-
 Makefile                                      |   4 +-
 README.ja.md                                  |   2 +-
 README.md                                     |   2 +-
 README.zh-TW.md                               |   2 +-
 cmd/mint/main.go                              |  49 ++++++--
 cmd/mint/main_signal_test.go                  |  95 ++++++++++++++
 cmd/mint/main_test.go                         | 116 ++++--------------
 docs/manual-testing.md                        |  13 +-
 internal/provider/anthropic/anthropic.go      |  40 +++++-
 internal/provider/anthropic/anthropic_test.go |  65 ++++++++++
 internal/provider/config.go                   |   7 ++
 internal/provider/config_test.go              |  12 +-
 internal/provider/googlegenai/google_genai.go |  47 ++++++-
 .../provider/googlegenai/google_genai_test.go |  62 ++++++++++
 internal/provider/openai/openai.go            |  39 +++++-
 internal/provider/openai/openai_test.go       |  68 ++++++++++
 npm/mint-ai/scripts/mint                      |   6 +
 script/install.sh                             |   7 +-
 21 files changed, 523 insertions(+), 129 deletions(-)
 create mode 100644 cmd/mint/main_signal_test.go

diff --git a/.github/workflows/publish-npm.yml b/.github/workflows/publish-npm.yml
index 40a3561..e4715a8 100644
--- a/.github/workflows/publish-npm.yml
+++ b/.github/workflows/publish-npm.yml
@@ -16,6 +16,11 @@ jobs:
 
     steps:
       - uses: actions/checkout@v6
+        with:
+          # Check out the released tag (workflow_run head_branch is the tag
+          # name), not main HEAD, so the packaging scripts and README match
+          # the release being published.
+          ref: ${{ github.event.workflow_run.head_branch }}
 
       - uses: actions/setup-node@v4
         with:
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 30f4a40..77c2bfd 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -17,6 +17,11 @@ jobs:
 
     steps:
       - uses: actions/checkout@v6
+        with:
+          # Check out the released tag (workflow_run head_branch is the tag
+          # name), not main HEAD, so the packaging scripts and README match
+          # the release being published.
+          ref: ${{ github.event.workflow_run.head_branch }}
 
       - uses: actions/setup-python@v5
         with:
@@ -58,4 +63,4 @@ jobs:
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
           packages-dir: dist/wheels/
-          skip_existing: true
+          skip-existing: true
diff --git a/AGENTS.md b/AGENTS.md
index ef24d33..3f2c8f7 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -44,6 +44,8 @@ binaries → `publish-pypi.yml` assembles wheels and uploads to PyPI.
 ```
 cmd/mint/main.go                     # entry point; cobra root command; viper config wiring
 internal/llm/llm.go                  # Completer interface for LLM backends
+internal/llm/writer.go               # TrailingNewlineWriter shared by provider backends
+internal/httpx/httpx.go              # shared tuned *http.Client used by every provider
 internal/provider/
   config.go                          # Config struct; provider validation
   provider.go                        # NewCompleter factory function
@@ -96,6 +98,6 @@ bin/mint                             # compiled binary (gitignored)
 - Source language: optional `--source` / `-s` flag (BCP-47 tag); flag-only, no env var (a source is per-input, not a persistent preference). When set it skips detection and anchors the rewrite prompt to translate *from* that language, so cross-language homographs (e.g. French `chat` → English `cat`) and romanized input (e.g. `konnichiwa` → `hello`) are translated rather than treated as already-target text. Empty (the default) preserves the original auto-detect behavior. Pure language-neutral input still passes through unchanged regardless.
 - Language-neutral pass-through: if detected language is `neutral`, input is printed unchanged with no translation call.
 - Same-language behavior: if detected input language matches the target language, the tool performs grammar and spelling correction instead of translation.
-- Target language priority: `--target` flag → `MINT_TARGET_LANG` env var → system locale (`$LC_ALL` / `$LANG`) → `en`.
+- Target language priority: `--target` flag → `MINT_TARGET_LANG` env var → system locale (`$LC_ALL` / `$LC_MESSAGES` / `$LANG`) → `en`.
 - Language rotation: `MINT_TARGET_LANG` accepts a comma-separated list (e.g., `en,zh-TW,ja`); when the detected input language matches a tag in the list, the tool translates to the next tag (wraps around). BCP-47 variants sharing the same primary subtag (e.g., `zh-HK` and `zh-TW`) are treated as equivalent.
 - Input from positional arg or stdin (auto-detected).
diff --git a/Makefile b/Makefile
index e9396d7..ef7313c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,6 @@
-VERSION ?= $(shell git describe --tags --exact-match 2>/dev/null || git rev-parse --short HEAD)
+# Leading "v" is stripped to match goreleaser's {{.Version}}, so `mint --version`
+# prints the same string for local builds and released binaries.
+VERSION ?= $(shell (git describe --tags --exact-match 2>/dev/null || git rev-parse --short HEAD) | sed 's/^v//')
 COMMIT ?= $(shell git rev-parse HEAD)
 LDFLAGS ?= -s -w -X main.version=$(VERSION) -X main.commit=$(COMMIT)
 NEW_FROM_REV ?= HEAD
diff --git a/README.ja.md b/README.ja.md
index b73033d..132744e 100644
--- a/README.ja.md
+++ b/README.ja.md
@@ -203,7 +203,7 @@ mint "こんにちは"   # ja → en: Hello
 | `MINT_PROVIDER` | `google-genai` \| `openai` \| `anthropic` | — (必須) |
 | `MINT_API_KEY` | APIキー。デフォルトのエンドポイント使用時は必須。`MINT_BASE_URL`設定時は任意（プロキシ側で認証処理する場合） | — |
 | `MINT_BASE_URL` | カスタムAPIベースURL（ドメインのみ指定、パスは各プロバイダーが自動付与）。`openai`と組み合わせることで、Ollama（`http://localhost:11434`）、LM Studio（`http://localhost:1234`）、またはOpenAI互換エンドポイントを指定可能 | プロバイダーのデフォルト |
-| `MINT_MODEL_NAME` | 使用するモデル名 | `gemini-3.1-flash-lite` / `gpt-4o-mini` / `claude-haiku-4-5` |
+| `MINT_MODEL_NAME` | 使用するモデル名（`MINT_BASE_URL` 設定時は必須） | `gemini-3.1-flash-lite` / `gpt-4o-mini` / `claude-haiku-4-5` |
 | `MINT_TARGET_LANG` | ターゲット言語（例: `en` または `en,zh-TW,ja`） | システムのロケール設定 |
 | `MINT_VERBOSE` | `true`に設定すると詳細な診断出力が有効になります（`--verbose`相当） | `false` |
 
diff --git a/README.md b/README.md
index 4ab0ee8..43658bb 100644
--- a/README.md
+++ b/README.md
@@ -203,7 +203,7 @@ mint "こんにちは"   # ja → en: Hello
 | `MINT_PROVIDER` | `google-genai` \| `openai` \| `anthropic` | — (required) |
 | `MINT_API_KEY` | API key; required when using the default endpoint; optional when `MINT_BASE_URL` is set (proxy handles auth) | — |
 | `MINT_BASE_URL` | Custom API base URL (domain only; each provider appends its own path); use with `openai` to target Ollama (`http://localhost:11434`), LM Studio (`http://localhost:1234`), or any other OpenAI-compatible endpoint | Provider default |
-| `MINT_MODEL_NAME` | Model to use | `gemini-3.1-flash-lite` / `gpt-4o-mini` / `claude-haiku-4-5` |
+| `MINT_MODEL_NAME` | Model to use; required when `MINT_BASE_URL` is set | `gemini-3.1-flash-lite` / `gpt-4o-mini` / `claude-haiku-4-5` |
 | `MINT_TARGET_LANG` | Target language(s), e.g. `en` or `en,zh-TW,ja` | System locale |
 | `MINT_VERBOSE` | Set to `true` to enable verbose diagnostic output (equivalent to `--verbose`) | `false` |
 
diff --git a/README.zh-TW.md b/README.zh-TW.md
index b93ba2d..136c69e 100644
--- a/README.zh-TW.md
+++ b/README.zh-TW.md
@@ -203,7 +203,7 @@ mint "こんにちは"   # ja → en: Hello
 | `MINT_PROVIDER` | `google-genai` \| `openai` \| `anthropic` | — (必填) |
 | `MINT_API_KEY` | API 金鑰；使用預設 endpoint 時必填；設定 `MINT_BASE_URL` 時選填（由代理處理認證） | — |
 | `MINT_BASE_URL` | 自訂 API base URL（僅填 domain，各提供商自行附加路徑）；搭配 `openai` 可指向 Ollama（`http://localhost:11434`）、LM Studio（`http://localhost:1234`）或任何 OpenAI 相容端點 | 提供商預設 |
-| `MINT_MODEL_NAME` | 使用的模型 | `gemini-3.1-flash-lite` / `gpt-4o-mini` / `claude-haiku-4-5` |
+| `MINT_MODEL_NAME` | 使用的模型；設定 `MINT_BASE_URL` 時必填 | `gemini-3.1-flash-lite` / `gpt-4o-mini` / `claude-haiku-4-5` |
 | `MINT_TARGET_LANG` | 目標語言，例如 `en` 或 `en,zh-TW,ja` | 系統區域設定 |
 | `MINT_VERBOSE` | 設為 `true` 可啟用詳細診斷輸出（等同於 `--verbose`） | `false` |
 
diff --git a/cmd/mint/main.go b/cmd/mint/main.go
index 67eb812..9bf866f 100644
--- a/cmd/mint/main.go
+++ b/cmd/mint/main.go
@@ -38,24 +38,56 @@ func main() {
 	os.Exit(run())
 }
 
+// signalError is the cancel cause recorded when a termination signal arrives;
+// it carries the signal so run() can map it to the conventional exit code.
+type signalError struct{ sig os.Signal }
+
+func (e *signalError) Error() string { return "interrupted by " + e.sig.String() }
+
+// exitCode maps the signal to the shell convention 128+N: 130 for SIGINT,
+// 143 for SIGTERM.
+func (e *signalError) exitCode() int {
+	if s, ok := e.sig.(syscall.Signal); ok {
+		return 128 + int(s)
+	}
+
+	return 130
+}
+
 func run() int {
 	// No request timeout: the CLI waits as long as the backend needs (handy for
 	// slow local models). Ctrl+C / SIGTERM cancels the in-flight request.
-	ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
-	defer stop()
+	//
+	// signal.NotifyContext is not used because it hides which signal fired,
+	// and SIGINT (130) and SIGTERM (143) must map to different exit codes; the
+	// cancel cause carries the signal instead.
+	ctx, cancel := context.WithCancelCause(context.Background())
+	defer cancel(nil)
+
+	sigCh := make(chan os.Signal, 1)
+
+	signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
+	defer signal.Stop(sigCh)
+
+	go func() {
+		cancel(&signalError{sig: <-sigCh})
+	}()
 
 	if err := newRootCmd().ExecuteContext(ctx); err != nil {
 		// Compare against context.Cause(ctx), not context.Canceled: net/http
-		// surfaces context.Cause(ctx), which signal.NotifyContext sets to a
-		// private signalError rather than context.Canceled. Checking errors.Is
+		// surfaces context.Cause(ctx), which the signal goroutine above sets
+		// to a *signalErr rather than context.Canceled. Checking errors.Is
 		// against the actual cause (instead of just ctx.Err() != nil) also
 		// makes sure an unrelated error isn't misreported as a clean interrupt
 		// merely because a signal happened to arrive around the same time.
 		// (context.Cause(ctx) is nil until ctx is done, and errors.Is(err, nil)
 		// is always false for a non-nil err, so no separate nil check is needed.)
-		if errors.Is(err, context.Cause(ctx)) {
+		cause := context.Cause(ctx)
+
+		var sigErr *signalError
+		if errors.Is(err, cause) && errors.As(cause, &sigErr) {
 			// Interrupted by the user — exit quietly with the conventional code.
-			return 130
+			return sigErr.exitCode()
 		}
 
 		fmt.Fprintln(os.Stderr, "Error:", err)
@@ -505,9 +537,10 @@ func buildDetectPrompt(text string) (system, user, nonce string) {
 	return system, user, nonce
 }
 
-// getSystemLanguage gets the system language from the OS locale.
+// getSystemLanguage gets the system language from the OS locale, following the
+// POSIX priority order: LC_ALL > LC_MESSAGES > LANG.
 func getSystemLanguage() string {
-	for _, env := range []string{"LC_ALL", "LANG"} {
+	for _, env := range []string{"LC_ALL", "LC_MESSAGES", "LANG"} {
 		if lang := os.Getenv(env); lang != "" {
 			// Strip encoding suffix before cutting on "_":
 			// "C.UTF-8" → "C"; "en_US.UTF-8" → "en_US" (no change here)
diff --git a/cmd/mint/main_signal_test.go b/cmd/mint/main_signal_test.go
new file mode 100644
index 0000000..8044218
--- /dev/null
+++ b/cmd/mint/main_signal_test.go
@@ -0,0 +1,95 @@
+// Copyright 2026 The Mint Authors.
+
+// Signal-delivery tests live behind a unix build tag: they send real POSIX
+// signals to the test process via syscall.Kill, which does not exist on
+// Windows (and neither does meaningful SIGINT/SIGTERM delivery there).
+
+//go:build unix
+
+package main
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"syscall"
+	"testing"
+	"time"
+)
+
+// TestRunInterrupted covers both signals run() registers (os.Interrupt and
+// syscall.SIGTERM): either one must cancel an in-flight request and exit
+// quietly with the conventional 128+N code (130 for SIGINT, 143 for SIGTERM).
+func TestRunInterrupted(t *testing.T) {
+	signals := []struct {
+		name     string
+		sig      syscall.Signal
+		wantCode int
+	}{
+		{"SIGINT", syscall.SIGINT, 130},
+		{"SIGTERM", syscall.SIGTERM, 143},
+	}
+
+	for _, tt := range signals {
+		t.Run(tt.name, func(t *testing.T) {
+			started := make(chan struct{})
+			done := make(chan struct{})
+
+			srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) {
+				close(started)
+				<-done // held open until the test releases it, regardless of client-side cancellation
+			}))
+
+			defer func() {
+				close(done)
+				srv.Close()
+			}()
+
+			t.Setenv("MINT_PROVIDER", "openai")
+			t.Setenv("MINT_API_KEY", "test")
+			t.Setenv("MINT_BASE_URL", srv.URL)
+			t.Setenv("MINT_MODEL_NAME", "test-model")
+
+			old := os.Args
+
+			os.Args = []string{"mint", "--target", "en", "hello"}
+			defer func() { os.Args = old }()
+
+			flushOut := captureStdout(t)
+			flushErr := captureStderr(t)
+
+			codeCh := make(chan int, 1)
+
+			go func() { codeCh <- run() }()
+
+			select {
+			case <-started:
+			case <-time.After(5 * time.Second):
+				t.Fatal("request never reached the server")
+			}
+
+			if err := syscall.Kill(os.Getpid(), tt.sig); err != nil {
+				t.Fatalf("failed to send %s: %v", tt.name, err)
+			}
+
+			var code int
+
+			select {
+			case code = <-codeCh:
+			case <-time.After(5 * time.Second):
+				t.Fatalf("run() did not return after %s", tt.name)
+			}
+
+			stderrOutput := flushErr()
+			_ = flushOut()
+
+			if code != tt.wantCode {
+				t.Errorf("expected exit code %d, got %d", tt.wantCode, code)
+			}
+
+			if stderrOutput != "" {
+				t.Errorf("expected no stderr output on interrupt, got: %q", stderrOutput)
+			}
+		})
+	}
+}
diff --git a/cmd/mint/main_test.go b/cmd/mint/main_test.go
index 115c42b..acc49d6 100644
--- a/cmd/mint/main_test.go
+++ b/cmd/mint/main_test.go
@@ -11,9 +11,7 @@ import (
 	"os"
 	"slices"
 	"strings"
-	"syscall"
 	"testing"
-	"time"
 
 	"github.com/min0625/mint/internal/llm"
 )
@@ -111,6 +109,7 @@ func TestResolveTargetLangs(t *testing.T) {
 
 func TestResolveTargetLangsSystemFallback(t *testing.T) {
 	t.Setenv("LANG", "ja_JP.UTF-8")
+	t.Setenv("LC_MESSAGES", "")
 	t.Setenv("LC_ALL", "")
 
 	got := resolveTargetLangs("", "")
@@ -121,6 +120,7 @@ func TestResolveTargetLangsSystemFallback(t *testing.T) {
 
 func TestResolveTargetLangsDefaultEn(t *testing.T) {
 	t.Setenv("LANG", "")
+	t.Setenv("LC_MESSAGES", "")
 	t.Setenv("LC_ALL", "")
 
 	got := resolveTargetLangs("", "")
@@ -278,26 +278,32 @@ func TestDetectLanguageFiltersEchoedNonce(t *testing.T) {
 
 func TestGetSystemLanguage(t *testing.T) {
 	tests := []struct {
-		name  string
-		lang  string
-		lcAll string
-		want  string
+		name       string
+		lang       string
+		lcMessages string
+		lcAll      string
+		want       string
 	}{
-		{"typical LANG", "en_US.UTF-8", "", "en"},
-		{"zh locale", "zh_TW.UTF-8", "", "zh"},
-		{"lang without region", "en", "", "en"},
-		{"C locale skipped, uses LC_ALL", "C", "fr_FR.UTF-8", "fr"},
-		{"C.UTF-8 locale skipped, uses LC_ALL", "C.UTF-8", "fr_FR.UTF-8", "fr"},
-		{"POSIX locale skipped, uses LC_ALL", posixLocale, "de_DE.UTF-8", "de"},
-		{"LC_ALL used when LANG empty", "", "ja_JP.UTF-8", "ja"},
-		{"LC_ALL overrides LANG when both set", "en_US.UTF-8", "ja_JP.UTF-8", "ja"},
-		{"both empty returns empty string", "", "", ""},
-		{"LC_ALL is C, falls through to LANG", "de_DE.UTF-8", "C", "de"},
-		{"LC_ALL is POSIX, falls through to LANG", "ko_KR.UTF-8", posixLocale, "ko"},
+		{"typical LANG", "en_US.UTF-8", "", "", "en"},
+		{"zh locale", "zh_TW.UTF-8", "", "", "zh"},
+		{"lang without region", "en", "", "", "en"},
+		{"C locale skipped, uses LC_ALL", "C", "", "fr_FR.UTF-8", "fr"},
+		{"C.UTF-8 locale skipped, uses LC_ALL", "C.UTF-8", "", "fr_FR.UTF-8", "fr"},
+		{"POSIX locale skipped, uses LC_ALL", posixLocale, "", "de_DE.UTF-8", "de"},
+		{"LC_ALL used when LANG empty", "", "", "ja_JP.UTF-8", "ja"},
+		{"LC_ALL overrides LANG when both set", "en_US.UTF-8", "", "ja_JP.UTF-8", "ja"},
+		{"all empty returns empty string", "", "", "", ""},
+		{"LC_ALL is C, falls through to LANG", "de_DE.UTF-8", "", "C", "de"},
+		{"LC_ALL is POSIX, falls through to LANG", "ko_KR.UTF-8", "", posixLocale, "ko"},
+		{"LC_MESSAGES used when LC_ALL empty", "", "fr_FR.UTF-8", "", "fr"},
+		{"LC_ALL overrides LC_MESSAGES", "", "fr_FR.UTF-8", "ja_JP.UTF-8", "ja"},
+		{"LC_MESSAGES overrides LANG", "en_US.UTF-8", "fr_FR.UTF-8", "", "fr"},
+		{"LC_MESSAGES is C, falls through to LANG", "de_DE.UTF-8", "C", "", "de"},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			t.Setenv("LANG", tt.lang)
+			t.Setenv("LC_MESSAGES", tt.lcMessages)
 			t.Setenv("LC_ALL", tt.lcAll)
 
 			if got := getSystemLanguage(); got != tt.want {
@@ -687,82 +693,6 @@ func TestRunError(t *testing.T) {
 	_ = flushErr()
 }
 
-// TestRunInterrupted covers both signals run() registers via
-// signal.NotifyContext (os.Interrupt and syscall.SIGTERM): either one must
-// cancel an in-flight request and exit quietly with code 130.
-func TestRunInterrupted(t *testing.T) {
-	signals := []struct {
-		name string
-		sig  syscall.Signal
-	}{
-		{"SIGINT", syscall.SIGINT},
-		{"SIGTERM", syscall.SIGTERM},
-	}
-
-	for _, tt := range signals {
-		t.Run(tt.name, func(t *testing.T) {
-			started := make(chan struct{})
-			done := make(chan struct{})
-
-			srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) {
-				close(started)
-				<-done // held open until the test releases it, regardless of client-side cancellation
-			}))
-
-			defer func() {
-				close(done)
-				srv.Close()
-			}()
-
-			t.Setenv("MINT_PROVIDER", "openai")
-			t.Setenv("MINT_API_KEY", "test")
-			t.Setenv("MINT_BASE_URL", srv.URL)
-			t.Setenv("MINT_MODEL_NAME", "test-model")
-
-			old := os.Args
-
-			os.Args = []string{"mint", "--target", "en", "hello"}
-			defer func() { os.Args = old }()
-
-			flushOut := captureStdout(t)
-			flushErr := captureStderr(t)
-
-			codeCh := make(chan int, 1)
-
-			go func() { codeCh <- run() }()
-
-			select {
-			case <-started:
-			case <-time.After(5 * time.Second):
-				t.Fatal("request never reached the server")
-			}
-
-			if err := syscall.Kill(os.Getpid(), tt.sig); err != nil {
-				t.Fatalf("failed to send %s: %v", tt.name, err)
-			}
-
-			var code int
-
-			select {
-			case code = <-codeCh:
-			case <-time.After(5 * time.Second):
-				t.Fatalf("run() did not return after %s", tt.name)
-			}
-
-			stderrOutput := flushErr()
-			_ = flushOut()
-
-			if code != 130 {
-				t.Errorf("expected exit code 130, got %d", code)
-			}
-
-			if stderrOutput != "" {
-				t.Errorf("expected no stderr output on interrupt, got: %q", stderrOutput)
-			}
-		})
-	}
-}
-
 func TestResolveSourceLang(t *testing.T) {
 	tests := []struct {
 		name string
diff --git a/docs/manual-testing.md b/docs/manual-testing.md
index 491d92e..cd16146 100644
--- a/docs/manual-testing.md
+++ b/docs/manual-testing.md
@@ -231,17 +231,22 @@ MINT_PROVIDER=invalid mint -t zh-TW "apple"
 unset MINT_API_KEY
 mint -t zh-TW "apple"      # Error: MINT_API_KEY is required for provider: <provider>
 
-# MINT_BASE_URL set: API key is optional (proxy handles auth)
+# MINT_BASE_URL set: API key is optional (proxy handles auth), but a model
+# name is required — a custom endpoint has no meaningful default model.
 export MINT_PROVIDER=openai
 export MINT_BASE_URL=http://localhost:11434
 export MINT_MODEL_NAME=translategemma:4b
 unset MINT_API_KEY
 mint -t zh-TW "hello"      # 你好  (no API key required)
+
+unset MINT_MODEL_NAME
+mint -t zh-TW "hello"      # Error: MINT_MODEL_NAME is required when MINT_BASE_URL is set
 ```
 
-Ctrl+C / SIGTERM cancels any in-flight request and exits with code 130.
-This applies when the signal arrives while an HTTP request is in progress;
-a signal sent before the request starts is handled by the OS default (exit 143 for SIGTERM).
+Ctrl+C sends SIGINT and exits with code 130; SIGTERM exits with code 143 — the
+conventional 128+N mapping, applied while an HTTP request is in progress so the two
+signals stay distinguishable. A signal sent before the request starts is handled by
+the OS default (exit 143 for SIGTERM, exit 130 for SIGINT).
 
 ## 14. `MINT_VERBOSE` environment variable
 
diff --git a/internal/provider/anthropic/anthropic.go b/internal/provider/anthropic/anthropic.go
index 251efbc..ccfc714 100644
--- a/internal/provider/anthropic/anthropic.go
+++ b/internal/provider/anthropic/anthropic.go
@@ -22,7 +22,7 @@ const (
 	defaultAPIPath   = "/v1/messages"
 	defaultModelName = "claude-haiku-4-5"
 	anthropicVersion = "2023-06-01"
-	maxTokens        = 8192
+	maxTokens        = 64000
 	temperature      = 0.3
 	// maxScanLineBytes raises bufio.Scanner's default 64KB line limit so a
 	// large SSE data line or error body does not abort the stream early.
@@ -70,8 +70,14 @@ type message struct {
 }
 
 type streamDelta struct {
-	Type string `json:"type"`
-	Text string `json:"text"`
+	Type       string `json:"type"`
+	Text       string `json:"text"`
+	StopReason string `json:"stop_reason"`
+}
+
+type streamError struct {
+	Type    string `json:"type"`
+	Message string `json:"message"`
 }
 
 type streamUsage struct {
@@ -88,6 +94,7 @@ type streamEvent struct {
 	Delta   streamDelta   `json:"delta"`
 	Message streamMessage `json:"message"`
 	Usage   streamUsage   `json:"usage"`
+	Error   *streamError  `json:"error"`
 }
 
 // Complete calls the Anthropic API with streaming and writes tokens to w as they arrive.
@@ -130,7 +137,10 @@ func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer)
 	scanner := bufio.NewScanner(resp.Body)
 	scanner.Buffer(make([]byte, 0, bufio.MaxScanTokenSize), maxScanLineBytes)
 
-	var usage llm.Usage
+	var (
+		usage     llm.Usage
+		truncated bool
+	)
 
 	out := llm.NewTrailingNewlineWriter(w)
 
@@ -148,10 +158,20 @@ func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer)
 		}
 
 		switch event.Type {
+		case "error":
+			// Mid-stream error event (e.g. overloaded_error): the HTTP status
+			// was already 200, so this is the only failure signal we get.
+			if event.Error != nil {
+				return llm.Usage{}, fmt.Errorf("API stream error: %s: %s", event.Error.Type, event.Error.Message)
+			}
 		case "message_start":
 			usage.InputTokens = event.Message.Usage.InputTokens
 		case "message_delta":
 			usage.OutputTokens = event.Usage.OutputTokens
+
+			if event.Delta.StopReason == "max_tokens" {
+				truncated = true
+			}
 		case "content_block_delta":
 			if event.Delta.Type == "text_delta" {
 				if _, err := fmt.Fprint(out, event.Delta.Text); err != nil {
@@ -165,5 +185,15 @@ func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer)
 		return llm.Usage{}, err
 	}
 
-	return usage, scanner.Err()
+	if err := scanner.Err(); err != nil {
+		return usage, err
+	}
+
+	// Surface truncation instead of silently returning partial text: without
+	// this, a long input would print an incomplete translation and exit 0.
+	if truncated {
+		return usage, fmt.Errorf("output truncated: response hit the %d output-token limit", maxTokens)
+	}
+
+	return usage, nil
 }
diff --git a/internal/provider/anthropic/anthropic_test.go b/internal/provider/anthropic/anthropic_test.go
index d6be027..15c0dd7 100644
--- a/internal/provider/anthropic/anthropic_test.go
+++ b/internal/provider/anthropic/anthropic_test.go
@@ -176,3 +176,68 @@ func TestCompleteRoleSeparation(t *testing.T) {
 
 	_, _ = anthropic.New("k", srv.URL, "").Complete(t.Context(), "my system instruction", "my user text", &sb)
 }
+
+// The API reports output truncation only via message_delta's stop_reason;
+// Complete must surface it as an error instead of returning partial text
+// with a nil error.
+func TestCompleteReturnsErrorOnMaxTokensTruncation(t *testing.T) {
+	const sse = `data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"partial"}}
+
+data: {"type":"message_delta","delta":{"stop_reason":"max_tokens"},"usage":{"output_tokens":8192}}
+
+data: {"type":"message_stop"}
+`
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte(sse))
+	}))
+	defer srv.Close()
+
+	var sb strings.Builder
+
+	usage, err := anthropic.New("k", srv.URL, "").Complete(t.Context(), "sys", "usr", &sb)
+	if err == nil {
+		t.Fatal("expected truncation error, got nil")
+	}
+
+	if !strings.Contains(err.Error(), "truncated") {
+		t.Errorf("error %q does not mention truncation", err.Error())
+	}
+
+	// The partial text was already streamed to the caller and usage was
+	// collected; both must still be delivered alongside the error.
+	if got, want := sb.String(), "partial\n"; got != want {
+		t.Errorf("output = %q, want %q", got, want)
+	}
+
+	if usage.OutputTokens != 8192 {
+		t.Errorf("OutputTokens = %d, want 8192", usage.OutputTokens)
+	}
+}
+
+// A mid-stream error event arrives after the HTTP status was already 200, so
+// it is the only failure signal; Complete must return it as an error.
+func TestCompleteReturnsErrorOnStreamErrorEvent(t *testing.T) {
+	const sse = `data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Hi"}}
+
+data: {"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}
+`
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte(sse))
+	}))
+	defer srv.Close()
+
+	var sb strings.Builder
+
+	_, err := anthropic.New("k", srv.URL, "").Complete(t.Context(), "sys", "usr", &sb)
+	if err == nil {
+		t.Fatal("expected stream error, got nil")
+	}
+
+	if !strings.Contains(err.Error(), "Overloaded") || !strings.Contains(err.Error(), "overloaded_error") {
+		t.Errorf("error %q does not carry the stream error details", err.Error())
+	}
+}
diff --git a/internal/provider/config.go b/internal/provider/config.go
index 73fbaa2..291a3b1 100644
--- a/internal/provider/config.go
+++ b/internal/provider/config.go
@@ -50,5 +50,12 @@ func (c *Config) ValidateConfig() error {
 		return fmt.Errorf("MINT_API_KEY is required for provider: %s", c.Provider)
 	}
 
+	// A custom endpoint has no meaningful default model (e.g. Ollama would be
+	// sent this provider's cloud default and return a confusing server error),
+	// so fail fast with a clear message instead.
+	if c.BaseURL != "" && c.ModelName == "" {
+		return errors.New("MINT_MODEL_NAME is required when MINT_BASE_URL is set")
+	}
+
 	return nil
 }
diff --git a/internal/provider/config_test.go b/internal/provider/config_test.go
index 6d004fc..f326bb7 100644
--- a/internal/provider/config_test.go
+++ b/internal/provider/config_test.go
@@ -60,9 +60,19 @@ func TestValidateConfig(t *testing.T) {
 		{"valid openai with key", provider.Config{Provider: provider.ProviderOpenAI, APIKey: "k"}, ""},
 		{
 			"valid openai with base url no key (proxy)",
-			provider.Config{Provider: provider.ProviderOpenAI, BaseURL: "http://localhost:11434"},
+			provider.Config{Provider: provider.ProviderOpenAI, BaseURL: "http://localhost:11434", ModelName: "m"},
 			"",
 		},
+		{
+			"base url without model name",
+			provider.Config{Provider: provider.ProviderOpenAI, BaseURL: "http://localhost:11434"},
+			"MINT_MODEL_NAME",
+		},
+		{
+			"base url with key still requires model name",
+			provider.Config{Provider: provider.ProviderAnthropic, APIKey: "k", BaseURL: "http://proxy.example.com"},
+			"MINT_MODEL_NAME",
+		},
 		{"valid google-genai with key", provider.Config{Provider: provider.ProviderGoogleGenAI, APIKey: "k"}, ""},
 		{"valid anthropic with key", provider.Config{Provider: provider.ProviderAnthropic, APIKey: "k"}, ""},
 		{"provider normalized to lowercase", provider.Config{Provider: "OpenAI", APIKey: "k"}, ""},
diff --git a/internal/provider/googlegenai/google_genai.go b/internal/provider/googlegenai/google_genai.go
index be10e13..83a6ba7 100644
--- a/internal/provider/googlegenai/google_genai.go
+++ b/internal/provider/googlegenai/google_genai.go
@@ -8,6 +8,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -74,13 +75,21 @@ type generationConfig struct {
 	Temperature float64 `json:"temperature"`
 }
 
+type apiError struct {
+	Code    int    `json:"code"`
+	Message string `json:"message"`
+	Status  string `json:"status"`
+}
+
 type responseBody struct {
 	Candidates    []candidate   `json:"candidates"`
 	UsageMetadata usageMetadata `json:"usageMetadata"`
+	Error         *apiError     `json:"error"`
 }
 
 type candidate struct {
-	Content content `json:"content"`
+	Content      content `json:"content"`
+	FinishReason string  `json:"finishReason"`
 }
 
 type usageMetadata struct {
@@ -130,7 +139,10 @@ func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer)
 	scanner := bufio.NewScanner(resp.Body)
 	scanner.Buffer(make([]byte, 0, bufio.MaxScanTokenSize), maxScanLineBytes)
 
-	var usage llm.Usage
+	var (
+		usage     llm.Usage
+		truncated bool
+	)
 
 	out := llm.NewTrailingNewlineWriter(w)
 
@@ -147,15 +159,27 @@ func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer)
 			continue
 		}
 
+		// Mid-stream error object: the HTTP status was already 200, so this is
+		// the only failure signal we get.
+		if result.Error != nil {
+			return llm.Usage{}, fmt.Errorf("API stream error %d: %s", result.Error.Code, result.Error.Message)
+		}
+
 		// usageMetadata accumulates across chunks; the last value is the total.
 		if result.UsageMetadata.PromptTokenCount > 0 || result.UsageMetadata.CandidatesTokenCount > 0 {
 			usage.InputTokens = result.UsageMetadata.PromptTokenCount
 			usage.OutputTokens = result.UsageMetadata.CandidatesTokenCount
 		}
 
-		if len(result.Candidates) > 0 && len(result.Candidates[0].Content.Parts) > 0 {
-			if _, err := fmt.Fprint(out, result.Candidates[0].Content.Parts[0].Text); err != nil {
-				return llm.Usage{}, err
+		if len(result.Candidates) > 0 {
+			if result.Candidates[0].FinishReason == "MAX_TOKENS" {
+				truncated = true
+			}
+
+			if len(result.Candidates[0].Content.Parts) > 0 {
+				if _, err := fmt.Fprint(out, result.Candidates[0].Content.Parts[0].Text); err != nil {
+					return llm.Usage{}, err
+				}
 			}
 		}
 	}
@@ -164,5 +188,16 @@ func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer)
 		return llm.Usage{}, err
 	}
 
-	return usage, scanner.Err()
+	if err := scanner.Err(); err != nil {
+		return usage, err
+	}
+
+	// Surface truncation instead of silently returning partial text: without
+	// this, a long input would print an incomplete translation and exit 0. No
+	// maxOutputTokens is requested, so the limit hit is the model default.
+	if truncated {
+		return usage, errors.New("output truncated: response hit the model's output-token limit")
+	}
+
+	return usage, nil
 }
diff --git a/internal/provider/googlegenai/google_genai_test.go b/internal/provider/googlegenai/google_genai_test.go
index bb2770a..a53044f 100644
--- a/internal/provider/googlegenai/google_genai_test.go
+++ b/internal/provider/googlegenai/google_genai_test.go
@@ -168,3 +168,65 @@ func TestCompleteRoleSeparation(t *testing.T) {
 
 	_, _ = googlegenai.New("k", srv.URL, "").Complete(t.Context(), "my system instruction", "my user text", &sb)
 }
+
+// A mid-stream error object arrives after the HTTP status was already 200, so
+// it is the only failure signal; Complete must return it as an error instead
+// of silently producing empty output.
+func TestCompleteReturnsErrorOnStreamErrorObject(t *testing.T) {
+	const sse = `data: {"error":{"code":429,"message":"quota exceeded","status":"RESOURCE_EXHAUSTED"}}
+`
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte(sse))
+	}))
+	defer srv.Close()
+
+	var sb strings.Builder
+
+	_, err := googlegenai.New("k", srv.URL, "").Complete(t.Context(), "sys", "usr", &sb)
+	if err == nil {
+		t.Fatal("expected stream error, got nil")
+	}
+
+	if !strings.Contains(err.Error(), "quota exceeded") || !strings.Contains(err.Error(), "429") {
+		t.Errorf("error %q does not carry the stream error details", err.Error())
+	}
+}
+
+// The API reports output truncation only via the candidate's finishReason;
+// Complete must surface it as an error instead of returning partial text with
+// a nil error.
+func TestCompleteReturnsErrorOnMaxTokensTruncation(t *testing.T) {
+	const sse = `data: {"candidates":[{"content":{"parts":[{"text":"partial"}]}}]}
+
+data: {"candidates":[{"content":{"parts":[]},"finishReason":"MAX_TOKENS"}],"usageMetadata":{"promptTokenCount":3,"candidatesTokenCount":8192}}
+`
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte(sse))
+	}))
+	defer srv.Close()
+
+	var sb strings.Builder
+
+	usage, err := googlegenai.New("k", srv.URL, "").Complete(t.Context(), "sys", "usr", &sb)
+	if err == nil {
+		t.Fatal("expected truncation error, got nil")
+	}
+
+	if !strings.Contains(err.Error(), "truncated") {
+		t.Errorf("error %q does not mention truncation", err.Error())
+	}
+
+	// The partial text was already streamed to the caller and usage was
+	// collected; both must still be delivered alongside the error.
+	if got, want := sb.String(), "partial\n"; got != want {
+		t.Errorf("output = %q, want %q", got, want)
+	}
+
+	if usage.OutputTokens != 8192 {
+		t.Errorf("OutputTokens = %d, want 8192", usage.OutputTokens)
+	}
+}
diff --git a/internal/provider/openai/openai.go b/internal/provider/openai/openai.go
index a250f54..9e2f974 100644
--- a/internal/provider/openai/openai.go
+++ b/internal/provider/openai/openai.go
@@ -8,6 +8,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -80,12 +81,19 @@ type streamUsage struct {
 }
 
 type streamChoice struct {
-	Delta streamDelta `json:"delta"`
+	Delta        streamDelta `json:"delta"`
+	FinishReason string      `json:"finish_reason"`
+}
+
+type streamError struct {
+	Message string `json:"message"`
+	Type    string `json:"type"`
 }
 
 type streamResponse struct {
 	Choices []streamChoice `json:"choices"`
 	Usage   *streamUsage   `json:"usage"`
+	Error   *streamError   `json:"error"`
 }
 
 // Complete calls the OpenAI API with streaming and writes tokens to w as they arrive.
@@ -137,7 +145,10 @@ func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer)
 	scanner := bufio.NewScanner(resp.Body)
 	scanner.Buffer(make([]byte, 0, bufio.MaxScanTokenSize), maxScanLineBytes)
 
-	var usage llm.Usage
+	var (
+		usage     llm.Usage
+		truncated bool
+	)
 
 	out := llm.NewTrailingNewlineWriter(w)
 
@@ -157,12 +168,23 @@ func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer)
 			continue
 		}
 
+		// Mid-stream error object: the HTTP status was already 200, so this is
+		// the only failure signal we get (some proxies and local servers emit
+		// errors this way).
+		if sr.Error != nil {
+			return llm.Usage{}, fmt.Errorf("API stream error: %s", sr.Error.Message)
+		}
+
 		if sr.Usage != nil {
 			usage.InputTokens = sr.Usage.PromptTokens
 			usage.OutputTokens = sr.Usage.CompletionTokens
 		}
 
 		if len(sr.Choices) > 0 {
+			if sr.Choices[0].FinishReason == "length" {
+				truncated = true
+			}
+
 			if _, err := fmt.Fprint(out, sr.Choices[0].Delta.Content); err != nil {
 				return llm.Usage{}, err
 			}
@@ -173,5 +195,16 @@ func (c *Client) Complete(ctx context.Context, system, user string, w io.Writer)
 		return llm.Usage{}, err
 	}
 
-	return usage, scanner.Err()
+	if err := scanner.Err(); err != nil {
+		return usage, err
+	}
+
+	// Surface truncation instead of silently returning partial text: without
+	// this, a long input would print an incomplete translation and exit 0. No
+	// max_tokens is requested, so the limit hit is the server or model default.
+	if truncated {
+		return usage, errors.New("output truncated: response hit the model's output-token limit")
+	}
+
+	return usage, nil
 }
diff --git a/internal/provider/openai/openai_test.go b/internal/provider/openai/openai_test.go
index 274d90c..159653b 100644
--- a/internal/provider/openai/openai_test.go
+++ b/internal/provider/openai/openai_test.go
@@ -247,3 +247,71 @@ data: [DONE]
 		t.Errorf("output = %q, want %q", got, want)
 	}
 }
+
+// A mid-stream error object arrives after the HTTP status was already 200, so
+// it is the only failure signal; Complete must return it as an error instead
+// of silently producing empty output.
+func TestCompleteReturnsErrorOnStreamErrorObject(t *testing.T) {
+	const sse = `data: {"error":{"message":"insufficient quota","type":"insufficient_quota"}}
+
+data: [DONE]
+`
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte(sse))
+	}))
+	defer srv.Close()
+
+	var sb strings.Builder
+
+	_, err := openai.New("k", srv.URL, "").Complete(t.Context(), "sys", "usr", &sb)
+	if err == nil {
+		t.Fatal("expected stream error, got nil")
+	}
+
+	if !strings.Contains(err.Error(), "insufficient quota") {
+		t.Errorf("error %q does not carry the stream error message", err.Error())
+	}
+}
+
+// The API reports output truncation only via the final chunk's finish_reason;
+// Complete must surface it as an error instead of returning partial text with
+// a nil error.
+func TestCompleteReturnsErrorOnLengthTruncation(t *testing.T) {
+	const sse = `data: {"choices":[{"delta":{"content":"partial"}}]}
+
+data: {"choices":[{"delta":{},"finish_reason":"length"}]}
+
+data: {"choices":[],"usage":{"prompt_tokens":3,"completion_tokens":8192}}
+
+data: [DONE]
+`
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte(sse))
+	}))
+	defer srv.Close()
+
+	var sb strings.Builder
+
+	usage, err := openai.New("k", srv.URL, "").Complete(t.Context(), "sys", "usr", &sb)
+	if err == nil {
+		t.Fatal("expected truncation error, got nil")
+	}
+
+	if !strings.Contains(err.Error(), "truncated") {
+		t.Errorf("error %q does not mention truncation", err.Error())
+	}
+
+	// The partial text was already streamed to the caller and usage was
+	// collected; both must still be delivered alongside the error.
+	if got, want := sb.String(), "partial\n"; got != want {
+		t.Errorf("output = %q, want %q", got, want)
+	}
+
+	if usage.OutputTokens != 8192 {
+		t.Errorf("OutputTokens = %d, want 8192", usage.OutputTokens)
+	}
+}
diff --git a/npm/mint-ai/scripts/mint b/npm/mint-ai/scripts/mint
index 122796d..f011ce7 100644
--- a/npm/mint-ai/scripts/mint
+++ b/npm/mint-ai/scripts/mint
@@ -2,6 +2,7 @@
 "use strict";
 
 const { spawnSync } = require("child_process");
+const { constants } = require("os");
 const { getBinaryPath } = require("../index.js");
 
 let bin;
@@ -17,4 +18,9 @@ if (result.error) {
   console.error(result.error.message);
   process.exit(1);
 }
+if (result.signal) {
+  // Child killed by a signal: status is null, so report the shell
+  // convention 128+N instead of masking it as success.
+  process.exit(128 + (constants.signals[result.signal] || 0));
+}
 process.exit(result.status ?? 0);
diff --git a/script/install.sh b/script/install.sh
index 9d98acc..1a7dee7 100644
--- a/script/install.sh
+++ b/script/install.sh
@@ -77,9 +77,10 @@ print_path_hint() {
     local shell_config
     # $SHELL is the user's login shell — the right target for a persistent PATH
     # hint (the script itself runs under bash via the curl|bash pipe, so $0 would
-    # be misleading).
+    # be misleading). ${SHELL:-} keeps set -u from aborting in minimal
+    # environments (e.g. containers) where SHELL is unset.
     # shellcheck disable=SC2088  # intentional: ~ is displayed as a hint to the user, not expanded
-    case "${SHELL}" in
+    case "${SHELL:-}" in
         */zsh) shell_config='~/.zshrc' ;;
         # macOS terminals start bash as a login shell, which reads ~/.bash_profile;
         # Linux interactive shells read ~/.bashrc.
@@ -96,7 +97,7 @@ print_path_hint() {
 
     warn "${INSTALL_DIR} is not in your PATH"
     printf "\n"
-    case "${SHELL}" in
+    case "${SHELL:-}" in
         */fish)
             printf "  Run the following to add it to your PATH:\n"
             printf "\n"