From c2513e3bd76bdff49c32ae37f1b7c188caa6bd8a Mon Sep 17 00:00:00 2001 From: Joachim Rosskopf Date: Mon, 25 May 2026 11:41:09 +0200 Subject: [PATCH] fix(self-packaging #62): bundled-mode HTTP serving end-to-end Before this change, `flapi pack` produced a working binary at the CLI level (`info`, `unpack`, etc.) but `./flapi-bundled` started from a clean cwd crashed with "Could not open file: flapi.yaml" -- the EmbeddedArchiveFileProvider was registered globally but the config / endpoint / template loaders bypassed it via std::ifstream and std::filesystem::recursive_directory_iterator. Four call sites had to be routed through FileProviderFactory so the in-memory archive is actually consulted when SetBundleContents has fired: - extended_yaml_parser: parseFile and the static loadYamlFile now read via FileProviderFactory::CreateProvider(path)->ReadFile(...) instead of std::ifstream. resolveIncludePath swaps std::filesystem::exists for a provider-aware ConfigFileExists so {{include from ...}} directives find their targets inside the bundle. - config_manager::parseTemplateConfig: in bundled mode, skip the std::filesystem::absolute() step -- the template path is a bundle key like "sqls", not a filesystem path; turning it into "/cwd/sqls" prevents the embedded provider from finding it. - config_manager::loadEndpointConfigsRecursively: in bundled mode, enumerate .yaml/.yml entries by prefix-scanning the bundle map instead of std::filesystem::recursive_directory_iterator. The EmbeddedArchiveFileProvider's ListFiles is non-recursive by contract, so the walk is inlined here. - config_manager::getFileProvider: in bundled mode, return a fresh provider from FileProviderFactory so SQLTemplateProcessor reads SQL templates from the bundle rather than the LocalFileProvider baked into ConfigLoader. Plus a small fix in sql_template_processor::getFullTemplatePath: when the endpoint parser has already resolved templateSource against its YAML's parent dir (which in bundled mode produces a full bundle key like "sqls/hello.sql"), don't re-prepend the basePath. Mirrors the existing absolute-path short-circuit just above. test_self_packaging_http.py adds the three pytest cases from the issue acceptance: - #1 unbundled flapi serves /hello with 200 + body - #3 bundled flapi from clean cwd (with the source tree deleted) serves the same /hello - #9 a SQL template using read_csv('embed://data/cities.csv') returns the bundled CSV rows over HTTP All 17 integration tests pass (existing CLI-level self-packaging suite + env-override suite + the 3 new HTTP cases). 637/637 C++ unit tests pass. Closes #62. --- src/config_manager.cpp | 55 +++- src/extended_yaml_parser.cpp | 57 ++-- src/sql_template_processor.cpp | 17 ++ test/integration/test_self_packaging_http.py | 268 +++++++++++++++++++ 4 files changed, 376 insertions(+), 21 deletions(-) create mode 100644 test/integration/test_self_packaging_http.py diff --git a/src/config_manager.cpp b/src/config_manager.cpp index 20b9b20..c32a83f 100644 --- a/src/config_manager.cpp +++ b/src/config_manager.cpp @@ -149,7 +149,16 @@ void ConfigManager::parseTemplateConfig() { if (template_node) { template_config.path = template_node["path"].as(); std::filesystem::path template_path_relative(template_config.path); - template_config.path = std::filesystem::absolute((base_path / template_path_relative).lexically_normal()).string(); + std::filesystem::path joined = (base_path / template_path_relative).lexically_normal(); + // In bundled mode the template path is a bundle-relative key + // (e.g. "sqls"), not a filesystem path. Calling absolute() would + // turn it into "/cwd/sqls" which the EmbeddedArchiveFileProvider + // would fail to look up. + if (FileProviderFactory::GetBundleContents() != nullptr) { + template_config.path = joined.string(); + } else { + template_config.path = std::filesystem::absolute(joined).string(); + } CROW_LOG_DEBUG << "Template Path: " << template_config.path; if (template_node["environment-whitelist"]) { @@ -430,7 +439,40 @@ void ConfigManager::loadEndpointConfigsRecursively(const std::filesystem::path& size_t total_yaml_files = 0; size_t loaded_endpoints = 0; - if (std::filesystem::exists(template_path) && std::filesystem::is_directory(template_path)) { + // Bundled mode: enumerate endpoint YAMLs by prefix-matching the + // in-memory archive. EmbeddedArchiveFileProvider::ListFiles is + // non-recursive on purpose, so we walk the bundle directly here. + if (auto bundle = FileProviderFactory::GetBundleContents(); + bundle != nullptr) { + std::string prefix = template_path.string(); + // Normalise to a key shape that matches archive entries (no + // leading "./", no leading "/", a single trailing "/"). + while (prefix.size() >= 2 && prefix[0] == '.' && prefix[1] == '/') { + prefix.erase(0, 2); + } + if (!prefix.empty() && prefix.front() == '/') { + prefix.erase(0, 1); + } + if (!prefix.empty() && prefix.back() != '/') { + prefix += '/'; + } + for (const auto& [name, _data] : *bundle) { + if (!prefix.empty() && name.rfind(prefix, 0) != 0) { + continue; + } + const std::filesystem::path entry_path(name); + const auto extension = entry_path.extension(); + if (extension != ".yaml" && extension != ".yml") { + continue; + } + total_yaml_files++; + size_t endpoints_before = endpoints.size(); + loadEndpointConfig(name); + if (endpoints.size() > endpoints_before) { + loaded_endpoints++; + } + } + } else if (std::filesystem::exists(template_path) && std::filesystem::is_directory(template_path)) { for (const auto& entry : std::filesystem::recursive_directory_iterator(template_path)) { if (entry.is_regular_file()) { auto extension = entry.path().extension(); @@ -1198,6 +1240,15 @@ std::shared_ptr ConfigManager::getFileProvider() const { return std::make_shared(base_provider, cache_config); } + // Bundled mode (#62): templates live in the in-memory archive, not on + // disk. Route through the factory so SQLTemplateProcessor gets an + // EmbeddedArchiveFileProvider when SetBundleContents has been called. + // Falls back to LocalFileProvider when no bundle is active, which + // matches the pre-bundled behaviour. + if (FileProviderFactory::GetBundleContents() != nullptr) { + return FileProviderFactory::CreateProvider(template_config.path); + } + return config_loader->getFileProvider(); } std::string ConfigManager::getCacheSchema() const { return cache_schema; } diff --git a/src/extended_yaml_parser.cpp b/src/extended_yaml_parser.cpp index 40ff691..806eb23 100644 --- a/src/extended_yaml_parser.cpp +++ b/src/extended_yaml_parser.cpp @@ -1,4 +1,5 @@ #include "include/extended_yaml_parser.hpp" +#include "include/vfs_adapter.hpp" #include #include #include @@ -15,6 +16,34 @@ namespace flapi { +namespace { + +// Read a file through FileProviderFactory so bundled-mode startup +// (`flapi pack` then run from a clean cwd) can resolve `flapi.yaml` +// and any `{{include from ...}}` references against the in-memory +// archive instead of std::ifstream. When no bundle is active the +// factory returns a LocalFileProvider, so the unbundled path is +// behaviourally unchanged. +std::string ReadConfigFile(const std::filesystem::path& file_path) { + const std::string path_str = file_path.string(); + auto provider = FileProviderFactory::CreateProvider(path_str); + if (!provider->FileExists(path_str)) { + throw std::runtime_error("Could not open file: " + path_str); + } + return provider->ReadFile(path_str); +} + +// Provider-aware existence check used by include resolution. When a bundle +// is active, std::filesystem::exists would always say "no" for entries +// that live only in the in-memory archive. +bool ConfigFileExists(const std::filesystem::path& file_path) { + const std::string path_str = file_path.string(); + auto provider = FileProviderFactory::CreateProvider(path_str); + return provider->FileExists(path_str); +} + +} // namespace + // IncludeConfig implementation bool ExtendedYamlParser::IncludeConfig::isEnvironmentVariableAllowed(const std::string& var_name) const { if (environment_whitelist.empty()) { @@ -50,18 +79,15 @@ ExtendedYamlParser::ParseResult ExtendedYamlParser::parseFile(const std::filesys actual_base_path = file_path.parent_path(); } - // Load file content - std::ifstream file(file_path); - if (!file.is_open()) { + std::string content; + try { + content = ReadConfigFile(file_path); + } catch (const std::exception& e) { result.success = false; - result.error_message = "Could not open file: " + file_path.string(); + result.error_message = e.what(); return result; } - std::stringstream buffer; - buffer << file.rdbuf(); - std::string content = buffer.str(); - // Track included files for circular dependency detection std::unordered_set included_files; included_files.insert(std::filesystem::absolute(file_path).string()); @@ -573,12 +599,12 @@ bool ExtendedYamlParser::resolveIncludePath(const std::filesystem::path& include const std::vector& include_paths) { // First try relative to base path resolved_path = base_path / include_path; - if (std::filesystem::exists(resolved_path)) { + if (ConfigFileExists(resolved_path)) { return true; } // Try absolute path - if (include_path.is_absolute() && std::filesystem::exists(include_path)) { + if (include_path.is_absolute() && ConfigFileExists(include_path)) { resolved_path = include_path; return true; } @@ -586,7 +612,7 @@ bool ExtendedYamlParser::resolveIncludePath(const std::filesystem::path& include // Try include paths for (const auto& include_base : include_paths) { resolved_path = std::filesystem::path(include_base) / include_path; - if (std::filesystem::exists(resolved_path)) { + if (ConfigFileExists(resolved_path)) { return true; } } @@ -595,14 +621,7 @@ bool ExtendedYamlParser::resolveIncludePath(const std::filesystem::path& include } YAML::Node ExtendedYamlParser::loadYamlFile(const std::filesystem::path& file_path) { - std::ifstream file(file_path); - if (!file.is_open()) { - throw std::runtime_error("Could not open file: " + file_path.string()); - } - - std::stringstream buffer; - buffer << file.rdbuf(); - return YAML::Load(buffer.str()); + return YAML::Load(ReadConfigFile(file_path)); } YAML::Node ExtendedYamlParser::extractSection(const YAML::Node& node, const std::string& section_name) { diff --git a/src/sql_template_processor.cpp b/src/sql_template_processor.cpp index 668e78f..fa1475b 100644 --- a/src/sql_template_processor.cpp +++ b/src/sql_template_processor.cpp @@ -95,6 +95,23 @@ std::string SQLTemplateProcessor::getFullTemplatePath(const std::string& templat return basePath + templateSource; } + // In bundled mode the endpoint parser already resolved templateSource + // against the YAML's parent directory, so it carries the full bundle + // key (e.g. "sqls/hello.sql"). Joining with the basePath ("sqls") + // would produce "sqls/sqls/hello.sql". Mirror the absolute-path + // short-circuit above: if templateSource already starts with the + // basePath prefix, return it as-is. + { + std::string base_with_sep = basePath; + if (!base_with_sep.empty() && base_with_sep.back() != '/') { + base_with_sep += '/'; + } + if (!base_with_sep.empty() && + templateSource.compare(0, base_with_sep.size(), base_with_sep) == 0) { + return templateSource; + } + } + // Local path resolution std::filesystem::path fullPath = std::filesystem::path(basePath) / templateSource; return fullPath.string(); diff --git a/test/integration/test_self_packaging_http.py b/test/integration/test_self_packaging_http.py new file mode 100644 index 0000000..8c27e28 --- /dev/null +++ b/test/integration/test_self_packaging_http.py @@ -0,0 +1,268 @@ +"""End-to-end HTTP tests for bundled-mode self-packaging (issue #62). + +Covers the three behaviours that were explicitly deferred from PR #56: + + Behaviour #1 -- unbundled flapi against a fixture config dir serves /hello + with the expected JSON body. + Behaviour #3 -- the same fixture packed via `flapi pack` and started + from a clean cwd (the source tree no longer reachable) + serves /hello with the same body. + Behaviour #9 -- a SQL template that calls read_csv('embed://...') resolves + through the EmbeddedFileSystem DuckDB VFS and returns the + expected rows over the REST surface. + +Behaviour #2/#4/#5/#6/#7/#8 are covered at the CLI level by +test_self_packaging.py; this file is HTTP-only. +""" + +from __future__ import annotations + +import os +import pathlib +import socket +import subprocess +import sys +import time +from typing import List + +import pytest +import urllib.request + +sys.path.insert(0, str(pathlib.Path(__file__).parent)) +from conftest import get_flapi_binary # noqa: E402 + + +pytestmark = pytest.mark.standalone_server + + +def _flapi() -> pathlib.Path: + return get_flapi_binary() + + +def _pick_free_port() -> int: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("127.0.0.1", 0)) + return s.getsockname()[1] + + +def _wait_for_bind(host: str, port: int, timeout_s: float = 30.0) -> bool: + deadline = time.monotonic() + timeout_s + while time.monotonic() < deadline: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(0.5) + try: + s.connect((host, port)) + return True + except OSError: + time.sleep(0.2) + return False + + +def _write_minimal_fixture(root: pathlib.Path) -> None: + """A small but functional config tree exercising both #1/#3 (a plain + /hello endpoint) and #9 (read_csv from embed://data/cities.csv). + + The endpoint uses inline VALUES for /hello so the success path doesn't + depend on read_csv at all; a separate /cities endpoint reads from + embed:// to specifically verify behaviour #9. + """ + (root / "sqls").mkdir(parents=True, exist_ok=True) + (root / "data").mkdir(parents=True, exist_ok=True) + + (root / "flapi.yaml").write_text( + "project-name: bundled-http-test\n" + "project-description: integration test fixture for issue #62\n" + "template:\n" + " path: ./sqls\n" + "connections: {}\n" + "duckdb:\n" + " access_mode: READ_WRITE\n" + " threads: 1\n" + " max_memory: 256MB\n" + ) + + (root / "sqls" / "hello.yaml").write_text( + "url-path: /hello\n" + "method: GET\n" + "template-source: hello.sql\n" + ) + # No trailing semicolon -- flapi wraps the user query in + # `SELECT * FROM (...)` for pagination, so the inner statement must + # be a bare SELECT expression. + (root / "sqls" / "hello.sql").write_text( + "SELECT 'world' AS greeting\n" + ) + + (root / "sqls" / "cities.yaml").write_text( + "url-path: /cities\n" + "method: GET\n" + "template-source: cities.sql\n" + ) + (root / "sqls" / "cities.sql").write_text( + "SELECT * FROM read_csv('embed://data/cities.csv', header=true) " + "ORDER BY name\n" + ) + + (root / "data" / "cities.csv").write_text( + "name,country\n" + "Berlin,DE\n" + "Karlsruhe,DE\n" + "Vienna,AT\n" + ) + + +def _start_server( + args: List[str], cwd: pathlib.Path, port: int +) -> subprocess.Popen: + env = {k: v for k, v in os.environ.items() if not k.startswith("FLAPI_")} + # NOTE: FLAPI_PORT lands in a separate PR (#63); for now we pass --port + # on the CLI so this suite works against main and against PR #63 alike. + cmd = list(args) + ["--port", str(port)] + return subprocess.Popen( + cmd, + cwd=str(cwd), + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + + +def _stop(proc: subprocess.Popen) -> None: + proc.terminate() + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait(timeout=5) + + +def _http_json(port: int, path: str): + req = urllib.request.Request( + f"http://127.0.0.1:{port}{path}", + headers={"Accept": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=10) as resp: + assert resp.status == 200, f"unexpected status {resp.status}" + import json + return json.loads(resp.read()) + + +def _read_log(proc: subprocess.Popen, max_chars: int = 2000) -> str: + if proc.stdout is None: + return "" + out = "" + try: + out = proc.stdout.read() or "" + except Exception: + pass + return out[:max_chars] + + +# --- Behaviour #1 ------------------------------------------------------------ + +def test_unbundled_server_serves_hello(tmp_path: pathlib.Path): + fixture = tmp_path / "fixture" + _write_minimal_fixture(fixture) + port = _pick_free_port() + + proc = _start_server( + [str(_flapi()), "-c", "flapi.yaml"], + cwd=fixture, + port=port, + ) + try: + assert _wait_for_bind("127.0.0.1", port), ( + f"unbundled server failed to bind on {port}; log:\n{_read_log(proc)}" + ) + body = _http_json(port, "/hello") + finally: + _stop(proc) + + rows = body.get("data", []) if isinstance(body, dict) else body + assert isinstance(rows, list) and len(rows) == 1, body + assert rows[0].get("greeting") == "world", body + + +# --- Behaviour #3 ------------------------------------------------------------ + +def test_bundled_server_serves_hello_from_clean_cwd(tmp_path: pathlib.Path): + fixture = tmp_path / "fixture" + _write_minimal_fixture(fixture) + + bundled = tmp_path / "flapi-bundled" + pack_res = subprocess.run( + [str(_flapi()), "pack", "--in", str(fixture), "--out", str(bundled)], + check=False, + capture_output=True, + text=True, + timeout=60, + ) + assert pack_res.returncode == 0, ( + f"flapi pack failed: stdout={pack_res.stdout} stderr={pack_res.stderr}" + ) + assert bundled.exists(), f"pack did not produce {bundled}" + + # Delete the source tree to prove the bundled binary doesn't reach + # back into it for any file it serves. + import shutil + shutil.rmtree(fixture) + + clean_cwd = tmp_path / "clean_cwd" + clean_cwd.mkdir() + port = _pick_free_port() + + proc = _start_server([str(bundled)], cwd=clean_cwd, port=port) + try: + assert _wait_for_bind("127.0.0.1", port), ( + f"bundled server failed to bind on {port}; log:\n{_read_log(proc)}" + ) + body = _http_json(port, "/hello") + finally: + _stop(proc) + + rows = body.get("data", []) if isinstance(body, dict) else body + assert isinstance(rows, list) and len(rows) == 1, body + assert rows[0].get("greeting") == "world", body + + +# --- Behaviour #9 ------------------------------------------------------------ + +def test_bundled_server_read_csv_via_embed_scheme(tmp_path: pathlib.Path): + fixture = tmp_path / "fixture" + _write_minimal_fixture(fixture) + + bundled = tmp_path / "flapi-bundled" + pack_res = subprocess.run( + [str(_flapi()), "pack", "--in", str(fixture), "--out", str(bundled)], + check=False, + capture_output=True, + text=True, + timeout=60, + ) + assert pack_res.returncode == 0, ( + f"flapi pack failed: stdout={pack_res.stdout} stderr={pack_res.stderr}" + ) + + import shutil + shutil.rmtree(fixture) + + clean_cwd = tmp_path / "clean_cwd" + clean_cwd.mkdir() + port = _pick_free_port() + + proc = _start_server([str(bundled)], cwd=clean_cwd, port=port) + try: + assert _wait_for_bind("127.0.0.1", port), ( + f"bundled server failed to bind; log:\n{_read_log(proc)}" + ) + body = _http_json(port, "/cities") + finally: + _stop(proc) + + rows = body.get("data", []) if isinstance(body, dict) else body + assert isinstance(rows, list) and len(rows) == 3, body + names = [row.get("name") for row in rows] + assert names == ["Berlin", "Karlsruhe", "Vienna"], body + assert rows[0].get("country") == "DE" + assert rows[2].get("country") == "AT"