diff --git a/doc/manual/source/protocols/nix-cache-info.md b/doc/manual/source/protocols/nix-cache-info.md index e8351e1cebe8..608e0e0000dd 100644 --- a/doc/manual/source/protocols/nix-cache-info.md +++ b/doc/manual/source/protocols/nix-cache-info.md @@ -36,12 +36,44 @@ error: binary cache 'https://example.com' is for Nix stores with prefix '/nix/st Integer. Sets the default for [`priority`](@docroot@/store/types/http-binary-cache-store.md#store-http-binary-cache-store-priority). +### `GetNarInfosV1` + +The path (relative to the cache URL) of an endpoint for fetching the +metadata of multiple store paths in a single request. If this field is +present, a client may send a `POST` request to this path whose body is +a list of store path hash parts (one per line). The server responds +with one JSON object per line (newline-delimited JSON), each being the +[version 2 JSON representation](@docroot@/protocols/json/store-object-info.md) +of a store path's metadata, extended with a `"path"` field holding the +store path it describes. This lets a client fetch the metadata for many +paths in one request instead of one request per path. A path whose +object is absent from the response is not available in the cache. + +Each object may also contain a `"partialClosure"` field: an array of +store paths that are (some of) the path's indirect references. This is +a hint that lets a client start fetching the metadata of an entire +closure without waiting for the intervening objects; it need not be +complete and is not covered by the signature. + +If the field is absent, the client falls back to fetching each +`.narinfo` individually. + +## Other fields + +Any field not listed above is stored verbatim in the client's +[on-disk cache](#caching-behavior) of `nix-cache-info` but is otherwise +ignored. This keeps the format forward-compatible: a newer server can +advertise a capability that an older client persists, and a later +client version can start using it without the server's metadata having +to be re-fetched. + ## Example -``` +```text StoreDir: /nix/store WantMassQuery: 1 Priority: 30 +GetNarInfosV1: /get-narinfos-v1 ``` ## Caching Behavior diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 92d743a16439..24028125159b 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -86,6 +86,8 @@ void BinaryCacheStore::applyCacheInfoFields(const std::map(*value)) config.priority.setDefault(*priority); } + if (auto * endpoint = get(fields, "GetNarInfosV1")) + getNarInfosV1 = *endpoint; } void BinaryCacheStore::init() diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index b7ef57346956..d4bf7a6cb2e5 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -384,7 +384,8 @@ struct curlFileTransfer : public FileTransfer *logger, lvlTalkative, actFileTransfer, - fmt("%s '%s'", request.verb(/*continuous=*/true), request.uri), + request.activityText ? *request.activityText + : fmt("%s '%s'", request.verb(/*continuous=*/true), request.uri), Logger::Fields{request.uri.to_string()}, request.parentAct); // Reset the start time to when we actually started the download. diff --git a/src/libstore/http-binary-cache-store.cc b/src/libstore/http-binary-cache-store.cc index 7a2f8b8b6d56..5cc29e86dd37 100644 --- a/src/libstore/http-binary-cache-store.cc +++ b/src/libstore/http-binary-cache-store.cc @@ -1,6 +1,7 @@ #include "nix/store/http-binary-cache-store.hh" #include "nix/store/filetransfer.hh" #include "nix/store/globals.hh" +#include "nix/store/nar-info.hh" #include "nix/store/nar-info-disk-cache.hh" #include "nix/store/sqlite.hh" #include "nix/util/callback.hh" @@ -8,6 +9,9 @@ #include "nix/store/store-registration.hh" #include "nix/store/globals.hh" #include "nix/util/topo-sort.hh" +#include "nix/util/strings.hh" + +#include namespace nix { @@ -302,6 +306,89 @@ void HttpBinaryCacheStore::getFile(const std::string & path, Callback HttpBinaryCacheStore::queryPathInfos( + const std::set & paths, + fun>>)> callback) +{ + using Result = std::pair>; + + checkEnabled(); + + /* Fall back to per-path queries unless the cache advertises an + endpoint for fetching multiple narinfos at once (the path to + POST to). */ + if (!getNarInfosV1) { + co_await Store::queryPathInfos(paths, std::move(callback)); + co_return; + } + + /* Check the client-side caches first and report the hits; collect + the misses to fetch from the server. */ + std::vector cached; + std::vector misses; + for (auto & path : paths) { + if (auto r = queryPathInfoFromClientCache(path)) + cached.emplace_back(path, *r); + else + misses.push_back(path); + } + if (!cached.empty()) + callback(std::move(cached)); + + if (misses.empty()) + co_return; + + /* Fetch the misses in a single request. `body` and `source` live + in the coroutine frame, so they stay alive across the + `co_await` while the transfer (which holds a raw pointer to + `source`) runs. */ + std::string body; + for (auto & path : misses) + body += std::string(path.hashPart()) + "\n"; + StringSource source{body}; + + auto request = makeRequest(*getNarInfosV1); + request.method = HttpMethod::Post; + request.data = {body.size(), source}; + request.mimeType = "text/plain"; + request.activityText = fmt("querying info on %d paths from '%s'", misses.size(), request.uri); + + FileTransferResult result; + try { + result = co_await callbackToAwaitable( + [&](Callback cb) { fileTransfer->enqueueFileTransfer(request, std::move(cb)); }); + } catch (FileTransferError &) { + maybeDisable(); + throw; + } + + /* Parse the narinfos (one JSON object per line), indexed by hash + part. */ + std::map, std::less<>> received; + for (auto & line : tokenizeString(result.data, "\n")) { + auto narInfo = std::make_shared(NarInfo::fromJSON(*this, nlohmann::json::parse(line))); + stats.narInfoRead++; + received.insert_or_assign(std::string(narInfo->path.hashPart()), std::move(narInfo)); + } + + /* Match each miss against the received narinfos, cache the result + (including negative entries), and report it. */ + auto cacheKey = config->getReference().render(/*FIXME withParams=*/false); + + std::vector results; + for (auto & path : misses) { + std::shared_ptr info; + if (auto i = received.find(path.hashPart()); + i != received.end() && (path.name() == MissingName || i->second->path == path)) + info = i->second; + if (diskCache) + diskCache->upsertNarInfo(cacheKey, std::string(path.hashPart()), info); + pathInfoCache->lock()->upsert(path, PathInfoCacheValue{.value = info}); + results.emplace_back(path, info); + } + callback(std::move(results)); +} + std::optional HttpBinaryCacheStore::getNixCacheInfo() { try { diff --git a/src/libstore/include/nix/store/binary-cache-store.hh b/src/libstore/include/nix/store/binary-cache-store.hh index 1f600d8fbc8f..13e603ffde51 100644 --- a/src/libstore/include/nix/store/binary-cache-store.hh +++ b/src/libstore/include/nix/store/binary-cache-store.hh @@ -84,6 +84,14 @@ struct alignas(8) /* Work around ASAN failures on i686-linux. */ */ Config & config; + /** + * The endpoint (relative to the cache URL) advertised by the + * cache's `nix-cache-info` `GetNarInfosV1` field for fetching + * multiple `.narinfo` files in one request, if any. Discovered at + * `init()` time. + */ + std::optional getNarInfosV1; + private: std::vector> signers; diff --git a/src/libstore/include/nix/store/filetransfer.hh b/src/libstore/include/nix/store/filetransfer.hh index 76309dff72fa..c48e26464f6d 100644 --- a/src/libstore/include/nix/store/filetransfer.hh +++ b/src/libstore/include/nix/store/filetransfer.hh @@ -184,6 +184,7 @@ struct FileTransferRequest HttpMethod method = HttpMethod::Get; unsigned int baseRetryTimeMs = RETRY_TIME_MS_DEFAULT; ActivityId parentAct; + std::optional activityText; bool decompress = true; /** diff --git a/src/libstore/include/nix/store/http-binary-cache-store.hh b/src/libstore/include/nix/store/http-binary-cache-store.hh index 475dc95104ab..f8392c61dd69 100644 --- a/src/libstore/include/nix/store/http-binary-cache-store.hh +++ b/src/libstore/include/nix/store/http-binary-cache-store.hh @@ -84,6 +84,10 @@ public: StorePaths topoSortPaths(const StorePathSet & paths) override; + asio::awaitable queryPathInfos( + const std::set & paths, + fun>>)> callback) override; + protected: std::optional getCompressionMethod(const std::string & path); diff --git a/src/libstore/include/nix/store/nar-info.hh b/src/libstore/include/nix/store/nar-info.hh index 7403dc1d4452..3194ed67590f 100644 --- a/src/libstore/include/nix/store/nar-info.hh +++ b/src/libstore/include/nix/store/nar-info.hh @@ -17,6 +17,17 @@ struct UnkeyedNarInfo : virtual UnkeyedValidPathInfo std::optional fileHash; uint64_t fileSize = 0; + /** + * An optional hint containing (some of) the indirect references + * of this path, i.e. a subset of the references closure of the + * path, excluding the path itself and its direct references. + * This allows substituters to start fetching the NAR info of the + * entire closure without waiting for intermediate narinfos. It + * is only a hint: it need not be complete and is not covered by + * the signature. + */ + StorePathSet partialClosure; + UnkeyedNarInfo(UnkeyedValidPathInfo info) : UnkeyedValidPathInfo(std::move(info)) { @@ -50,6 +61,19 @@ struct NarInfo : ValidPathInfo, UnkeyedNarInfo { } + /** + * Combine the unkeyed NAR info with its store path. Used to + * reconstruct a `NarInfo` from its JSON representation. + */ + NarInfo(UnkeyedNarInfo info, StorePath path) + : UnkeyedValidPathInfo{static_cast(info)} + /* As in `NarInfo(ValidPathInfo)`, only this most-derived + constructor initializes the virtual base. */ + , ValidPathInfo{std::move(path), static_cast(*this)} + , UnkeyedNarInfo{std::move(info)} + { + } + NarInfo(const StoreDirConfig & store, StorePath path, Hash narHash) : NarInfo{ValidPathInfo{std::move(path), UnkeyedValidPathInfo{store, narHash}}} { @@ -71,6 +95,21 @@ struct NarInfo : ValidPathInfo, UnkeyedNarInfo bool operator==(const NarInfo &) const = default; std::string to_string(const StoreDirConfig & store) const; + + using UnkeyedNarInfo::toJSON; + + /** + * Like `UnkeyedNarInfo::toJSON()`, but also includes the store + * path (as a `"path"` field), so the result is self-describing. + * Always uses the V2 JSON format. + */ + nlohmann::json toJSON(const StoreDirConfig & store, bool includeImpureInfo) const; + + /** + * Inverse of `toJSON()`: reconstruct a keyed `NarInfo` (including + * its store path) from JSON. Only the V2 format is supported. + */ + static NarInfo fromJSON(const StoreDirConfig & store, const nlohmann::json & json); }; } // namespace nix diff --git a/src/libstore/include/nix/store/path-info.hh b/src/libstore/include/nix/store/path-info.hh index 98f9ebe7aeda..bd4d53905343 100644 --- a/src/libstore/include/nix/store/path-info.hh +++ b/src/libstore/include/nix/store/path-info.hh @@ -43,6 +43,11 @@ struct SubstitutablePathInfo * 0 = unknown */ uint64_t narSize; + /** + * A hint containing (some of) the indirect references of this + * path, see `UnkeyedNarInfo::partialClosure`. + */ + StorePathSet partialClosure; }; using SubstitutablePathInfos = std::map; diff --git a/src/libstore/include/nix/store/store-api.hh b/src/libstore/include/nix/store/store-api.hh index e16e2c77d36d..02edb0a313cd 100644 --- a/src/libstore/include/nix/store/store-api.hh +++ b/src/libstore/include/nix/store/store-api.hh @@ -14,6 +14,8 @@ #include "nix/store/store-dir-config.hh" #include "nix/store/store-reference.hh" #include "nix/util/source-path.hh" +#include "nix/util/async.hh" +#include "nix/util/fun.hh" #include #include @@ -430,6 +432,19 @@ public: */ void queryPathInfo(const StorePath & path, Callback> callback) noexcept; + /** + * Asynchronously query information about multiple store paths. As + * results arrive (possibly in batches from a remote server), + * `callback` is invoked one or more times with a vector of + * `(path, info)` pairs. A null `info` denotes that the path is + * not valid. Every requested path is reported exactly once across + * all invocations of `callback`. Unlike `queryPathInfo()`, an + * invalid path is not an error. + */ + virtual asio::awaitable queryPathInfos( + const std::set & paths, + fun>>)> callback); + /** * Version of queryPathInfo() that only queries the local narinfo cache and not * the actual store. diff --git a/src/libstore/misc.cc b/src/libstore/misc.cc index c6d1a10a95a6..df748441d65e 100644 --- a/src/libstore/misc.cc +++ b/src/libstore/misc.cc @@ -139,6 +139,7 @@ querySubstitutablePathInfosAsync(Store & store, const StorePathCAMap & paths, Su .references = info->references, .downloadSize = narInfo ? narInfo->fileSize : 0, .narSize = info->narSize, + .partialClosure = narInfo ? narInfo->partialClosure : StorePathSet{}, }); break; /* We are done. */ @@ -157,6 +158,7 @@ querySubstitutablePathInfosAsync(Store & store, const StorePathCAMap & paths, Su }); } +// FIXME: remove this, queryMissing() no longer uses it. void Store::querySubstitutablePathInfos(const StorePathCAMap & paths, SubstitutablePathInfos & infos) { asio::io_context ctx; @@ -167,171 +169,217 @@ void Store::querySubstitutablePathInfos(const StorePathCAMap & paths, Substituta std::rethrow_exception(ex); } -static void collectDerivedPaths( - std::set & out, ref inputDrv, const DerivedPathMap::ChildNode & node) -{ - if (!node.value.empty()) - out.insert(DerivedPath::Built{inputDrv, node.value}); - for (const auto & [outputName, childNode] : node.childMap) - collectDerivedPaths( - out, make_ref(SingleDerivedPath::Built{inputDrv, outputName}), childNode); -} - MissingPaths Store::queryMissing(const std::vector & targets) { Activity act(*logger, lvlDebug, actUnknown, "querying info about missing paths"); MissingPaths res; - auto mustBuildDrv = [&](const StorePath & drvPath, const Derivation & drv, std::set & edges) { + auto collectDerivedPaths = [&](this auto & collectDerivedPaths, + std::set & out, + ref inputDrv, + const DerivedPathMap::ChildNode & node) -> void { + if (!node.value.empty()) + out.insert(DerivedPath::Built{inputDrv, node.value}); + for (const auto & [outputName, childNode] : node.childMap) + collectDerivedPaths( + out, make_ref(SingleDerivedPath::Built{inputDrv, outputName}), childNode); + }; + + auto mustBuildDrv = [&](const StorePath & drvPath, const Derivation & drv, std::set & out) { res.willBuild.insert(drvPath); for (const auto & [inputDrv, inputNode] : drv.inputDrvs.map) - collectDerivedPaths(edges, makeConstantStorePathRef(inputDrv), inputNode); + collectDerivedPaths(out, makeConstantStorePathRef(inputDrv), inputNode); }; - GetEdgesAsync getEdges = [&](const DerivedPath & req) -> asio::awaitable> { - std::set edges; - - co_await std::visit( - overloaded{ - [&](const DerivedPath::Built & bfd) -> asio::awaitable { - auto drvPathP = std::get_if(&*bfd.drvPath); - if (!drvPathP) { - // TODO make work in this case. - warn( - "Ignoring dynamic derivation %s while querying missing paths; not yet implemented", - bfd.drvPath->to_string(*this)); - co_return; - } - auto & drvPath = drvPathP->path; + asio::io_context ctx; + std::exception_ptr ex; - if (!isValidPath(drvPath)) { - // FIXME: we could try to substitute the derivation. - res.unknown.insert(drvPath); - co_return; - } + std::set done; - StorePathSet invalid; - /* true for regular derivations, and CA derivations for which we - have a trust mapping for all wanted outputs. */ - auto knownOutputPaths = true; - for (auto & [outputName, pathOpt] : queryPartialDerivationOutputMap(drvPath)) { - if (!pathOpt) { - knownOutputPaths = false; - break; - } - if (bfd.outputs.contains(outputName) && !isValidPath(*pathOpt)) - invalid.insert(*pathOpt); - } - if (knownOutputPaths && invalid.empty()) - co_return; - - auto drv = make_ref(derivationFromPath(drvPath)); - DerivationOptions drvOptions; - try { - // FIXME: this is a lot of work just to get the value - // of `allowSubstitutes`. - drvOptions = derivationOptionsFromStructuredAttrs( - *this, drv->inputDrvs, drv->env, get(drv->structuredAttrs)); - } catch (Error & e) { - e.addTrace({}, "while parsing derivation '%s'", printStorePath(drvPath)); - throw; - } + auto subs = getDefaultSubstituters(); - if (!knownOutputPaths && settings.getWorkerSettings().useSubstitutes - && drvOptions.substitutesAllowed(settings.getWorkerSettings())) { - experimentalFeatureSettings.require(Xp::CaDerivations); + std::function(std::set)> doPaths; + doPaths = [&](std::set paths) -> asio::awaitable { + debug("working on batch of %d paths", paths.size()); - // If there are unknown output paths, attempt to find if the - // paths are known to substituters through a realisation. - auto outputHashes = staticOutputHashes(*this, *drv); - knownOutputPaths = true; + std::set pathsToQuery; - for (auto [outputName, hash] : outputHashes) { - if (!bfd.outputs.contains(outputName)) - continue; + std::map>> outPathsToDrvs; - bool found = false; - for (auto & sub : getDefaultSubstituters()) { - /* TODO: Asyncify this. */ - auto realisation = sub->queryRealisation({hash, outputName}); - if (!realisation) - continue; - found = true; - if (!isValidPath(realisation->outPath)) - invalid.insert(realisation->outPath); - break; - } - if (!found) { - // Some paths did not have a realisation, this must be built. + while (!paths.empty()) { + auto p = *paths.begin(); + paths.erase(paths.begin()); + if (!done.insert(p).second) + continue; + std::visit( + overloaded{ + [&](const DerivedPath::Built & bfd) -> void { + auto drvPathP = std::get_if(&*bfd.drvPath); + if (!drvPathP) { + // TODO make work in this case. + warn( + "Ignoring dynamic derivation %s while querying missing paths; not yet implemented", + bfd.drvPath->to_string(*this)); + return; + } + auto & drvPath = drvPathP->path; + + if (!isValidPath(drvPath)) { + // FIXME: we could try to substitute the derivation. + res.unknown.insert(drvPath); + return; + } + + StorePathSet invalid; + /* true for regular derivations, and CA derivations for which we + have a trust mapping for all wanted outputs. */ + auto knownOutputPaths = true; + for (auto & [outputName, pathOpt] : queryPartialDerivationOutputMap(drvPath)) { + if (!pathOpt) { knownOutputPaths = false; break; } + if (bfd.outputs.contains(outputName) && !isValidPath(*pathOpt)) + invalid.insert(*pathOpt); + } + if (knownOutputPaths && invalid.empty()) + return; + + auto drv = make_ref(derivationFromPath(drvPath)); + DerivationOptions drvOptions; + try { + // FIXME: this is a lot of work just to get the value + // of `allowSubstitutes`. + drvOptions = derivationOptionsFromStructuredAttrs( + *this, drv->inputDrvs, drv->env, get(drv->structuredAttrs)); + } catch (Error & e) { + e.addTrace({}, "while parsing derivation '%s'", printStorePath(drvPath)); + throw; } - } - if (knownOutputPaths && settings.getWorkerSettings().useSubstitutes - && drvOptions.substitutesAllowed(settings.getWorkerSettings())) { - bool mustBuild = false; - StorePathSet substitutable; - auto * cap = getDerivationCA(*drv); - - /* Query all outputs concurrently (but not in parallel, - computeClosure runs on a strand). If any one is not - substitutable then discard all other outputs. */ - co_await forEachAsync(invalid, [&](const StorePath & outPath) -> asio::awaitable { - if (mustBuild) - co_return; - - SubstitutablePathInfos infos; - co_await querySubstitutablePathInfosAsync( - *this, {{outPath, cap ? std::optional{*cap} : std::nullopt}}, infos); - - if (infos.empty()) - mustBuild = true; - else - substitutable.insert(outPath); - }); + if (!knownOutputPaths && settings.getWorkerSettings().useSubstitutes + && drvOptions.substitutesAllowed(settings.getWorkerSettings())) { + experimentalFeatureSettings.require(Xp::CaDerivations); - if (mustBuild) - mustBuildDrv(drvPath, *drv, edges); - else - for (auto & path : substitutable) - edges.insert(DerivedPath::Opaque{path}); - } else { - mustBuildDrv(drvPath, *drv, edges); - } + // If there are unknown output paths, attempt to find if the + // paths are known to substituters through a realisation. + auto outputHashes = staticOutputHashes(*this, *drv); + knownOutputPaths = true; + + for (auto [outputName, hash] : outputHashes) { + if (!bfd.outputs.contains(outputName)) + continue; + + bool found = false; + for (auto & sub : getDefaultSubstituters()) { + /* TODO: Asyncify this. */ + auto realisation = sub->queryRealisation({hash, outputName}); + if (!realisation) + continue; + found = true; + if (!isValidPath(realisation->outPath)) + invalid.insert(realisation->outPath); + break; + } + if (!found) { + // Some paths did not have a realisation, this must be built. + knownOutputPaths = false; + break; + } + } + } + + if (knownOutputPaths && settings.getWorkerSettings().useSubstitutes + && drvOptions.substitutesAllowed(settings.getWorkerSettings()) && !subs.empty()) { + for (auto & p : invalid) { + pathsToQuery.insert(p); + outPathsToDrvs[p].insert_or_assign(drvPath, drv); + } + } else + mustBuildDrv(drvPath, *drv, paths); + }, + [&](const DerivedPath::Opaque & bo) -> void { + // FIXME: this should probably be an async call, but for a local store we probably don't want to + // bother. + if (!maybeQueryPathInfo(bo.path)) { + if (settings.getWorkerSettings().useSubstitutes && !subs.empty()) + pathsToQuery.insert(bo.path); + else + res.unknown.insert(bo.path); + } + }, }, - [&](const DerivedPath::Opaque & bo) -> asio::awaitable { - if (maybeQueryPathInfo(bo.path)) - co_return; + p.raw()); + } - SubstitutablePathInfos infos; - co_await querySubstitutablePathInfosAsync(*this, {{bo.path, std::nullopt}}, infos); + if (pathsToQuery.empty()) + co_return; - if (infos.empty()) { - res.unknown.insert(bo.path); - co_return; - } + auto executor = co_await asio::this_coro::executor; - auto info = infos.find(bo.path); - assert(info != infos.end()); - res.willSubstitute.insert(bo.path); - res.downloadSize += info->second.downloadSize; - res.narSize += info->second.narSize; + std::unordered_map negativeResultsPerPath; - for (auto & ref : info->second.references) - edges.insert(DerivedPath::Opaque{ref}); - }, - }, - req.raw()); + /* Query all substituters concurrently. FIXME: this may not be desirable. */ + co_await forEachAsync(subs, [&](const ref & sub) -> asio::awaitable { + debug("querying %d paths on '%s'", pathsToQuery.size(), sub->config.getHumanReadableURI()); + return sub->queryPathInfos( + pathsToQuery, [&](std::vector>> infos) { + debug("got %d paths from %s", infos.size(), sub->config.getHumanReadableURI()); + + std::set todo; + + for (auto & [path, info] : infos) { + if (info) { + if (!res.willSubstitute.insert(path).second) + continue; + res.willSubstitute.insert(path); + res.narSize += info->narSize; - co_return edges; + for (auto & ref : info->references) + todo.insert(DerivedPath::Opaque{ref}); + + if (auto narInfo = std::dynamic_pointer_cast(info)) { + res.downloadSize += narInfo->fileSize; + + /* Recurse into the partial closure hint as well, + so we don't have to wait for the narinfos of + the direct references to discover the rest of + the closure. */ + for (auto & ref : narInfo->partialClosure) + todo.insert(DerivedPath::Opaque{ref}); + } + } else { + if (++negativeResultsPerPath[path] == subs.size()) { + if (auto i = outPathsToDrvs.find(path); i != outPathsToDrvs.end()) { + for (auto & [drvPath, drv] : i->second) + mustBuildDrv(drvPath, *drv, todo); + } else + /* This path is not a derivation output, so there is no way to produce it. */ + res.unknown.insert(path); + } + } + } + + if (!todo.empty()) + asio::co_spawn(executor, std::bind(doPaths, todo), [&](std::exception_ptr e) { + if (e) + ex = e; + }); + }); + }); + + co_return; }; - std::set startElts(targets.begin(), targets.end()); - std::set visited; - computeClosure(std::move(startElts), visited, std::move(getEdges)); + asio::co_spawn( + ctx, std::bind(doPaths, std::set(targets.begin(), targets.end())), [&](std::exception_ptr e) { + ex = e; + }); + + ctx.run(); + if (ex) + std::rethrow_exception(ex); return res; } diff --git a/src/libstore/nar-info.cc b/src/libstore/nar-info.cc index 6f1c6b96cc54..68bb7f2c5432 100644 --- a/src/libstore/nar-info.cc +++ b/src/libstore/nar-info.cc @@ -75,6 +75,12 @@ NarInfo::NarInfo(const StoreDirConfig & store, const std::string & s, const std: throw corrupt("extra References"); for (auto & r : refs) references.insert(StorePath(r)); + } else if (name == "PartialClosure") { + auto refs = tokenizeString(value, " "); + if (!partialClosure.empty()) + throw corrupt("extra PartialClosure"); + for (auto & r : refs) + partialClosure.insert(StorePath(r)); } else if (name == "Deriver") { if (value != "unknown-deriver") deriver = StorePath(value); @@ -125,6 +131,13 @@ std::string NarInfo::to_string(const StoreDirConfig & store) const res += "References: " + concatStringsSep(" ", shortRefs()) + "\n"; + if (!partialClosure.empty()) { + Strings ss; + for (auto & p : partialClosure) + ss.push_back(std::string(p.to_string())); + res += "PartialClosure: " + concatStringsSep(" ", ss) + "\n"; + } + if (deriver) res += "Deriver: " + std::string(deriver->to_string()) + "\n"; @@ -160,6 +173,13 @@ UnkeyedNarInfo::toJSON(const StoreDirConfig * store, bool includeImpureInfo, Pat } if (fileSize) jsonObject["downloadSize"] = fileSize; + if (!partialClosure.empty()) { + auto & jsonPartialClosure = jsonObject["partialClosure"] = json::array(); + for (auto & p : partialClosure) + jsonPartialClosure.emplace_back( + format == PathInfoJsonFormat::V1 ? static_cast(store->printStorePath(p)) + : static_cast(p)); + } } return jsonObject; @@ -191,9 +211,43 @@ UnkeyedNarInfo UnkeyedNarInfo::fromJSON(const StoreDirConfig * store, const nloh if (auto * downloadSize = get(obj, "downloadSize")) res.fileSize = getUnsigned(*downloadSize); + if (auto * partialClosure = get(obj, "partialClosure")) { + try { + for (auto & input : getArray(*partialClosure)) + res.partialClosure.insert( + format == PathInfoJsonFormat::V1 ? store->parseStorePath(getString(input)) + : static_cast(input)); + } catch (Error & e) { + e.addTrace({}, "while reading key 'partialClosure'"); + throw; + } + } + return res; } +nlohmann::json NarInfo::toJSON(const StoreDirConfig & store, bool includeImpureInfo) const +{ + auto jsonObject = UnkeyedNarInfo::toJSON(&store, includeImpureInfo, PathInfoJsonFormat::V2); + jsonObject["path"] = path; + return jsonObject; +} + +NarInfo NarInfo::fromJSON(const StoreDirConfig & store, const nlohmann::json & json) +{ + auto & obj = getObject(json); + + PathInfoJsonFormat format = PathInfoJsonFormat::V1; + if (auto * version = optionalValueAt(obj, "version")) + format = *version; + if (format != PathInfoJsonFormat::V2) + throw Error("NAR info JSON must use format version 2"); + + auto path = static_cast(valueAt(obj, "path")); + + return NarInfo{UnkeyedNarInfo::fromJSON(&store, json), std::move(path)}; +} + } // namespace nix namespace nlohmann { diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index e53e741acb4b..b562f9232d44 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -613,6 +613,25 @@ void Store::queryPathInfo(const StorePath & storePath, Callback Store::queryPathInfos( + const std::set & paths, + fun>>)> callback) +{ + /* Default implementation: query each path individually, reporting + each result as it arrives. */ + co_await forEachAsync(paths, [&](const StorePath & path) -> asio::awaitable { + std::shared_ptr info; + try { + auto i = co_await callbackToAwaitable>( + [&](Callback> cb) { queryPathInfo(path, std::move(cb)); }); + info = i.get_ptr(); + } catch (InvalidPath &) { + } + std::vector>> result{{path, info}}; + callback(std::move(result)); + }); +} + void Store::queryRealisation( const DrvOutput & id, Callback> callback) noexcept { diff --git a/src/nix/serve.cc b/src/nix/serve.cc index 7206411fd3da..7c14e22b1c00 100644 --- a/src/nix/serve.cc +++ b/src/nix/serve.cc @@ -3,7 +3,10 @@ #include "nix/util/serialise.hh" #include "nix/util/signals.hh" #include "nix/util/deleter.hh" +#include "nix/util/strings.hh" #include "nix/store/nar-info.hh" + +#include #include "nix/store/binary-cache-store.hh" #include "nix/store/log-store.hh" #include "nix/util/environment-variables.hh" @@ -18,6 +21,36 @@ using namespace nix; using Response = std::unique_ptr>; +/** + * Render the narinfo for `info`, including a `PartialClosure` hint. + * Paths in `alreadySent` are omitted from the hint, since the hint + * only serves to let the client discover paths to traverse, so + * there is no need to send a path more than once in the same HTTP + * call. The references and hint of this narinfo are added to + * `alreadySent` in turn. + */ +static NarInfo makeNarInfo(Store & store, const ValidPathInfo & info, StorePathSet & alreadySent) +{ + NarInfo ni(info); + ni.compression = "none"; + StorePathSet closure; + try { + store.computeFSClosure(info.path, closure); + } catch (InvalidPath &) { + } + std::erase_if(closure, [&](const StorePath & p) { + return p == info.path || info.references.contains(p) || alreadySent.contains(p); + }); + alreadySent.insert(info.references.begin(), info.references.end()); + alreadySent.insert(closure.begin(), closure.end()); + ni.partialClosure = std::move(closure); + // FIXME: would be nicer to use just the NAR hash, but we can't look up NARs by NAR hash. + ni.url = + "nar/" + std::string(info.path.hashPart()) + "-" + info.narHash.to_string(HashFormat::Nix32, false) + ".nar"; + ni.fileSize = info.narSize; + return ni; +} + struct CmdServe : StoreCommand { uint16_t port = 8080; @@ -72,8 +105,12 @@ struct CmdServe : StoreCommand ; } - MHD_Result - handleRequest(Store & store, MHD_Connection * connection, const std::string & url, std::string_view method) + MHD_Result handleRequest( + Store & store, + MHD_Connection * connection, + const std::string & url, + std::string_view method, + std::string_view body) try { std::string clientAddr = "unknown"; if (auto * info = MHD_get_connection_info(connection, MHD_CONNECTION_INFO_CLIENT_ADDRESS)) { @@ -99,18 +136,25 @@ struct CmdServe : StoreCommand static const std::regex narUrlRegex{R"(^/nar/([0-9a-z]+)-([0-9a-z]+)\.nar$)"}; static const std::regex logUrlRegex{R"(^/log/([0-9a-z]+-[0-9a-zA-Z+\-._?=]+)$)"}; - if (method != MHD_HTTP_METHOD_GET && method != MHD_HTTP_METHOD_HEAD) { - std::string_view body = "405 method not allowed\n"; + auto methodNotAllowed = [&](const char * allow) { + static constexpr std::string_view body = "405 method not allowed\n"; response.reset(MHD_create_response_from_buffer(body.size(), (void *) body.data(), MHD_RESPMEM_PERSISTENT)); - MHD_add_response_header(response.get(), "Allow", "GET, HEAD"); + MHD_add_response_header(response.get(), "Allow", allow); return MHD_queue_response(connection, MHD_HTTP_METHOD_NOT_ALLOWED, response.get()); - } + }; + + if (url == "/get-narinfos-v1") { + if (method != MHD_HTTP_METHOD_POST) + return methodNotAllowed("POST"); + } else if (method != MHD_HTTP_METHOD_GET && method != MHD_HTTP_METHOD_HEAD) + return methodNotAllowed("GET, HEAD"); if (url == "/nix-cache-info") { auto body = std::make_unique( "StoreDir: " + store.storeDir + "\n" "WantMassQuery: " + (store.config.wantMassQuery ? "1" : "0") + "\n" - "Priority: " + std::to_string(priority.value_or(store.config.priority)) + "\n"); + "Priority: " + std::to_string(priority.value_or(store.config.priority)) + "\n" + "GetNarInfosV1: /get-narinfos-v1\n"); response.reset(MHD_create_response_from_buffer(body->size(), body->data(), MHD_RESPMEM_MUST_COPY)); MHD_add_response_header(response.get(), "Content-Type", "text/x-nix-cache-info"); @@ -121,16 +165,38 @@ struct CmdServe : StoreCommand return notFound(); auto info = store.queryPathInfo(*path); - NarInfo ni(*info); - ni.compression = "none"; - // FIXME: would be nicer to use just the NAR hash, but we can't look up NARs by NAR hash. - ni.url = "nar/" + std::string(info->path.hashPart()) + "-" - + info->narHash.to_string(HashFormat::Nix32, false) + ".nar"; - ni.fileSize = info->narSize; - auto body = ni.to_string(store); + StorePathSet alreadySent; + auto body = makeNarInfo(store, *info, alreadySent).to_string(store); response.reset(MHD_create_response_from_buffer(body.size(), body.data(), MHD_RESPMEM_MUST_COPY)); MHD_add_response_header(response.get(), "Content-Type", "text/x-nix-narinfo"); + } else if (url == "/get-narinfos-v1") { + /* Resolve all requested hash parts first, so that the + queried paths can be excluded from each other's + `PartialClosure` hint. */ + StorePathSet queried; + for (auto & hashPart : tokenizeString(std::string(body), "\n\r \t")) { + if (auto path = store.queryPathFromHashPart(hashPart)) + queried.insert(*path); + } + + /* Return the narinfo of each valid path as a JSON object, + one per line (newline-delimited JSON). The absence of a + path from the response denotes that it is invalid. */ + auto alreadySent = queried; + std::string res; + for (auto & path : queried) { + try { + auto info = store.queryPathInfo(path); + res += makeNarInfo(store, *info, alreadySent).toJSON(store, true).dump(); + res += "\n"; + } catch (InvalidPath &) { + } + } + + response.reset(MHD_create_response_from_buffer(res.size(), res.data(), MHD_RESPMEM_MUST_COPY)); + MHD_add_response_header(response.get(), "Content-Type", "application/x-ndjson"); + } else if (std::smatch m; std::regex_match(url, m, narUrlRegex)) { auto hashPart = m[1].str(); auto expectedNarHash = m[2].str(); @@ -217,9 +283,9 @@ struct CmdServe : StoreCommand return MHD_queue_response(connection, MHD_HTTP_OK, response.get()); } catch (const Error & e) { - auto body = fmt("500 Internal Server Error\n\nError: %s", e.message()); + auto errorBody = fmt("500 Internal Server Error\n\nError: %s", e.message()); Response response; - response.reset(MHD_create_response_from_buffer(body.size(), body.data(), MHD_RESPMEM_MUST_COPY)); + response.reset(MHD_create_response_from_buffer(errorBody.size(), errorBody.data(), MHD_RESPMEM_MUST_COPY)); MHD_add_response_header(response.get(), "Content-Type", "text/plain"); return MHD_queue_response(connection, MHD_HTTP_INTERNAL_SERVER_ERROR, response.get()); } @@ -234,6 +300,13 @@ struct CmdServe : StoreCommand Ctx ctx{*store, *this}; + /* Per-request state for accumulating the request body, since + microhttpd invokes the handler multiple times per request. */ + struct RequestState + { + std::string body; + }; + auto handler = [](void * cls, MHD_Connection * connection, const char * url, @@ -245,9 +318,25 @@ struct CmdServe : StoreCommand auto & ctx = *static_cast(cls); auto & store = ctx.store; auto & cmd = ctx.cmd; - return cmd.handleRequest(store, connection, std::string(url), method); + if (!*con_cls) { + *con_cls = new RequestState; + return MHD_YES; + } + auto & reqState = *static_cast(*con_cls); + if (*upload_data_size) { + reqState.body.append(upload_data, *upload_data_size); + *upload_data_size = 0; + return MHD_YES; + } + return cmd.handleRequest(store, connection, std::string(url), method, reqState.body); }; + auto requestCompleted = + [](void * cls, MHD_Connection * connection, void ** con_cls, MHD_RequestTerminationCode toe) { + delete static_cast(*con_cls); + *con_cls = nullptr; + }; + sockaddr_in addr4{}; sockaddr_in6 addr6{}; const sockaddr * sockAddr = nullptr; @@ -266,7 +355,18 @@ struct CmdServe : StoreCommand throw Error("invalid listen address '%s'", listenAddress); auto * daemon = MHD_start_daemon( - flags, port, nullptr, nullptr, handler, &ctx, MHD_OPTION_SOCK_ADDR, sockAddr, MHD_OPTION_END); + flags, + port, + nullptr, + nullptr, + handler, + &ctx, + MHD_OPTION_SOCK_ADDR, + sockAddr, + MHD_OPTION_NOTIFY_COMPLETED, + static_cast(requestCompleted), + nullptr, + MHD_OPTION_END); if (!daemon) throw Error("failed to start HTTP daemon on %s:%d", listenAddress, port); diff --git a/tests/functional/binary-cache.sh b/tests/functional/binary-cache.sh index 6e8f66f506fc..546456299f91 100755 --- a/tests/functional/binary-cache.sh +++ b/tests/functional/binary-cache.sh @@ -244,7 +244,6 @@ clearCacheCache restartNixServe nix-build --substituters "$httpBinaryCacheUrl" --no-require-sigs dependencies.nix -o "$TEST_ROOT/result" 2>&1 | tee "$TEST_ROOT/log" -grepQuiet "don't know how to build" "$TEST_ROOT/log" grepQuiet "building.*input-1" "$TEST_ROOT/log" grepQuiet "building.*input-2" "$TEST_ROOT/log"