Skip to content

Commit 8939cca

Browse files
fix(uploads): enforce streaming upload limits in gateway (#2589)
* fix: enforce gateway upload limits * fix: acquire sandbox before upload writes * Fix upload limit config wiring * Sanitize upload size error filenames * test: call upload routes unwrapped * fix: guard upload limits endpoint --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
1 parent 83938cf commit 8939cca

4 files changed

Lines changed: 393 additions & 14 deletions

File tree

backend/app/gateway/routers/uploads.py

Lines changed: 114 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@
3030

3131
router = APIRouter(prefix="/api/threads/{thread_id}/uploads", tags=["uploads"])
3232

33+
UPLOAD_CHUNK_SIZE = 8192
34+
DEFAULT_MAX_FILES = 10
35+
DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024
36+
DEFAULT_MAX_TOTAL_SIZE = 100 * 1024 * 1024
37+
3338

3439
class UploadResponse(BaseModel):
3540
"""Response model for file upload."""
@@ -39,6 +44,14 @@ class UploadResponse(BaseModel):
3944
message: str
4045

4146

47+
class UploadLimits(BaseModel):
48+
"""Application-level upload limits exposed to clients."""
49+
50+
max_files: int
51+
max_file_size: int
52+
max_total_size: int
53+
54+
4255
def _make_file_sandbox_writable(file_path: os.PathLike[str] | str) -> None:
4356
"""Ensure uploaded files remain writable when mounted into non-local sandboxes.
4457
@@ -69,6 +82,62 @@ def _get_uploads_config_value(app_config: AppConfig, key: str, default: object)
6982
return getattr(uploads_cfg, key, default)
7083

7184

85+
def _get_upload_limit(app_config: AppConfig, key: str, default: int, *, legacy_key: str | None = None) -> int:
86+
try:
87+
value = _get_uploads_config_value(app_config, key, None)
88+
if value is None and legacy_key is not None:
89+
value = _get_uploads_config_value(app_config, legacy_key, None)
90+
if value is None:
91+
value = default
92+
limit = int(value)
93+
if limit <= 0:
94+
raise ValueError
95+
return limit
96+
except Exception:
97+
logger.warning("Invalid uploads.%s value; falling back to %d", key, default)
98+
return default
99+
100+
101+
def _get_upload_limits(app_config: AppConfig) -> UploadLimits:
102+
return UploadLimits(
103+
max_files=_get_upload_limit(app_config, "max_files", DEFAULT_MAX_FILES, legacy_key="max_file_count"),
104+
max_file_size=_get_upload_limit(app_config, "max_file_size", DEFAULT_MAX_FILE_SIZE, legacy_key="max_single_file_size"),
105+
max_total_size=_get_upload_limit(app_config, "max_total_size", DEFAULT_MAX_TOTAL_SIZE),
106+
)
107+
108+
109+
def _cleanup_uploaded_paths(paths: list[os.PathLike[str] | str]) -> None:
110+
for path in reversed(paths):
111+
try:
112+
os.unlink(path)
113+
except FileNotFoundError:
114+
pass
115+
except Exception:
116+
logger.warning("Failed to clean up upload path after rejected request: %s", path, exc_info=True)
117+
118+
119+
async def _write_upload_file_streaming(
120+
file: UploadFile,
121+
file_path: os.PathLike[str] | str,
122+
*,
123+
display_filename: str,
124+
max_single_file_size: int,
125+
max_total_size: int,
126+
total_size: int,
127+
) -> tuple[int, int]:
128+
file_size = 0
129+
with open(file_path, "wb") as output:
130+
while chunk := await file.read(UPLOAD_CHUNK_SIZE):
131+
file_size += len(chunk)
132+
total_size += len(chunk)
133+
if file_size > max_single_file_size:
134+
raise HTTPException(status_code=413, detail=f"File too large: {display_filename}")
135+
if total_size > max_total_size:
136+
raise HTTPException(status_code=413, detail="Total upload size too large")
137+
output.write(chunk)
138+
return file_size, total_size
139+
140+
72141
def _auto_convert_documents_enabled(app_config: AppConfig) -> bool:
73142
"""Return whether automatic host-side document conversion is enabled.
74143
@@ -96,19 +165,28 @@ async def upload_files(
96165
if not files:
97166
raise HTTPException(status_code=400, detail="No files provided")
98167

168+
limits = _get_upload_limits(config)
169+
if len(files) > limits.max_files:
170+
raise HTTPException(status_code=413, detail=f"Too many files: maximum is {limits.max_files}")
171+
99172
try:
100173
uploads_dir = ensure_uploads_dir(thread_id)
101174
except ValueError as e:
102175
raise HTTPException(status_code=400, detail=str(e))
103176
sandbox_uploads = get_paths().sandbox_uploads_dir(thread_id, user_id=get_effective_user_id())
104177
uploaded_files = []
178+
written_paths = []
179+
sandbox_sync_targets = []
180+
total_size = 0
105181

106182
sandbox_provider = get_sandbox_provider()
107183
sync_to_sandbox = not _uses_thread_data_mounts(sandbox_provider)
108184
sandbox = None
109185
if sync_to_sandbox:
110186
sandbox_id = sandbox_provider.acquire(thread_id)
111187
sandbox = sandbox_provider.get(sandbox_id)
188+
if sandbox is None:
189+
raise HTTPException(status_code=500, detail="Failed to acquire sandbox")
112190
auto_convert_documents = _auto_convert_documents_enabled(config)
113191

114192
for file in files:
@@ -122,35 +200,41 @@ async def upload_files(
122200
continue
123201

124202
try:
125-
content = await file.read()
126203
file_path = uploads_dir / safe_filename
127-
file_path.write_bytes(content)
204+
written_paths.append(file_path)
205+
file_size, total_size = await _write_upload_file_streaming(
206+
file,
207+
file_path,
208+
display_filename=safe_filename,
209+
max_single_file_size=limits.max_file_size,
210+
max_total_size=limits.max_total_size,
211+
total_size=total_size,
212+
)
128213

129214
virtual_path = upload_virtual_path(safe_filename)
130215

131-
if sync_to_sandbox and sandbox is not None:
132-
_make_file_sandbox_writable(file_path)
133-
sandbox.update_file(virtual_path, content)
216+
if sync_to_sandbox:
217+
sandbox_sync_targets.append((file_path, virtual_path))
134218

135219
file_info = {
136220
"filename": safe_filename,
137-
"size": str(len(content)),
221+
"size": str(file_size),
138222
"path": str(sandbox_uploads / safe_filename),
139223
"virtual_path": virtual_path,
140224
"artifact_url": upload_artifact_url(thread_id, safe_filename),
141225
}
142226

143-
logger.info(f"Saved file: {safe_filename} ({len(content)} bytes) to {file_info['path']}")
227+
logger.info(f"Saved file: {safe_filename} ({file_size} bytes) to {file_info['path']}")
144228

145229
file_ext = file_path.suffix.lower()
146230
if auto_convert_documents and file_ext in CONVERTIBLE_EXTENSIONS:
147231
md_path = await convert_file_to_markdown(file_path)
148232
if md_path:
233+
written_paths.append(md_path)
149234
md_virtual_path = upload_virtual_path(md_path.name)
150235

151-
if sync_to_sandbox and sandbox is not None:
152-
_make_file_sandbox_writable(md_path)
153-
sandbox.update_file(md_virtual_path, md_path.read_bytes())
236+
if sync_to_sandbox:
237+
sandbox_sync_targets.append((md_path, md_virtual_path))
154238

155239
file_info["markdown_file"] = md_path.name
156240
file_info["markdown_path"] = str(sandbox_uploads / md_path.name)
@@ -159,17 +243,37 @@ async def upload_files(
159243

160244
uploaded_files.append(file_info)
161245

246+
except HTTPException as e:
247+
_cleanup_uploaded_paths(written_paths)
248+
raise e
162249
except Exception as e:
163250
logger.error(f"Failed to upload {file.filename}: {e}")
251+
_cleanup_uploaded_paths(written_paths)
164252
raise HTTPException(status_code=500, detail=f"Failed to upload {file.filename}: {str(e)}")
165253

254+
if sync_to_sandbox:
255+
for file_path, virtual_path in sandbox_sync_targets:
256+
_make_file_sandbox_writable(file_path)
257+
sandbox.update_file(virtual_path, file_path.read_bytes())
258+
166259
return UploadResponse(
167260
success=True,
168261
files=uploaded_files,
169262
message=f"Successfully uploaded {len(uploaded_files)} file(s)",
170263
)
171264

172265

266+
@router.get("/limits", response_model=UploadLimits)
267+
@require_permission("threads", "read", owner_check=True)
268+
async def get_upload_limits(
269+
thread_id: str,
270+
request: Request,
271+
config: AppConfig = Depends(get_config),
272+
) -> UploadLimits:
273+
"""Return upload limits used by the gateway for this thread."""
274+
return _get_upload_limits(config)
275+
276+
173277
@router.get("/list", response_model=dict)
174278
@require_permission("threads", "read", owner_check=True)
175279
async def list_uploaded_files(thread_id: str, request: Request) -> dict:

backend/docs/FILE_UPLOAD.md

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ POST /api/threads/{thread_id}/uploads
2222
**请求体:** `multipart/form-data`
2323
- `files`: 一个或多个文件
2424

25+
网关会在应用层限制上传规模,默认最多 10 个文件、单文件 50 MiB、单次请求总计 100 MiB。可通过 `config.yaml``uploads.max_files``uploads.max_file_size``uploads.max_total_size` 调整;前端会读取同一组限制并在选择文件时提示,超过限制时后端返回 `413 Payload Too Large`
26+
2527
**响应:**
2628
```json
2729
{
@@ -48,7 +50,23 @@ POST /api/threads/{thread_id}/uploads
4850
- `virtual_path`: Agent 在沙箱中使用的虚拟路径
4951
- `artifact_url`: 前端通过 HTTP 访问文件的 URL
5052

51-
### 2. 列出已上传文件
53+
### 2. 查询上传限制
54+
```
55+
GET /api/threads/{thread_id}/uploads/limits
56+
```
57+
58+
返回网关当前生效的上传限制,供前端在用户选择文件前提示和拦截。
59+
60+
**响应:**
61+
```json
62+
{
63+
"max_files": 10,
64+
"max_file_size": 52428800,
65+
"max_total_size": 104857600
66+
}
67+
```
68+
69+
### 3. 列出已上传文件
5270
```
5371
GET /api/threads/{thread_id}/uploads/list
5472
```
@@ -71,7 +89,7 @@ GET /api/threads/{thread_id}/uploads/list
7189
}
7290
```
7391

74-
### 3. 删除文件
92+
### 4. 删除文件
7593
```
7694
DELETE /api/threads/{thread_id}/uploads/{filename}
7795
```

0 commit comments

Comments
 (0)