From 5cbcdaee47cb8b8005a5741b85246cc7359bdb5e Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 25 May 2026 00:51:29 +0200 Subject: [PATCH 1/4] fix a corner case when segments get delete while reader reads outdated toc file --- .../index/internal/index_reader_worker.h | 18 ++++++-- .../keyvi/index/read_only_index_test.cpp | 43 +++++++++++++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/keyvi/include/keyvi/index/internal/index_reader_worker.h b/keyvi/include/keyvi/index/internal/index_reader_worker.h index 750f3b418..d7b92b738 100644 --- a/keyvi/include/keyvi/index/internal/index_reader_worker.h +++ b/keyvi/include/keyvi/index/internal/index_reader_worker.h @@ -189,12 +189,22 @@ class IndexReaderWorker final { } void UpdateWatcher() { + int retries_left = 3; while (!stop_update_thread_) { TRACE("UpdateWatcher: Check for new segments"); - // reload - ReloadIndex(); - ReloadDeletedKeys(); - // sleep for next refresh + try { + ReloadIndex(); + ReloadDeletedKeys(); + retries_left = 0; + } catch (const std::exception& ex) { + TRACE("UpdateWatcher: reload failed: %s, retries left: %d", ex.what(), retries_left); + last_modification_time_ = 0; + if (retries_left > 0) { + --retries_left; + continue; + } + retries_left = 3; + } std::this_thread::sleep_for(refresh_interval_); } } diff --git a/keyvi/tests/keyvi/index/read_only_index_test.cpp b/keyvi/tests/keyvi/index/read_only_index_test.cpp index eb10782d7..4cd5cb1bb 100644 --- a/keyvi/tests/keyvi/index/read_only_index_test.cpp +++ b/keyvi/tests/keyvi/index/read_only_index_test.cpp @@ -298,6 +298,49 @@ BOOST_AUTO_TEST_CASE(nearMatching) { {"\"pizzeria in Munich 4\"", "\"pizzeria in Munich 1\""}); } +BOOST_AUTO_TEST_CASE(reloadRecoveryAfterMissingSegment) { + testing::IndexMock index; + + std::vector> test_data = { + {"abc", "{a:1}"}, {"def", "{b:2}"}, + }; + index.AddSegment(&test_data); + + ReadOnlyIndex reader(index.GetIndexFolder(), {{"refresh_interval", "100"}}); + BOOST_CHECK(reader.Contains("abc")); + BOOST_CHECK(reader.Contains("def")); + + // simulate the race condition: write a TOC referencing a non-existent segment + std::this_thread::sleep_for(std::chrono::seconds(1)); + { + boost::filesystem::path toc_file(index.GetIndexFolder()); + toc_file /= "index.toc"; + std::ofstream toc(toc_file.string()); + toc << R"({"files": ["kv-0.kv", "kv-nonexistent.kv"]})"; + } + + // wait for a few reload cycles — the reader should not crash + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + + // the reader should still serve the old segments + BOOST_CHECK(reader.Contains("abc")); + BOOST_CHECK(reader.Contains("def")); + + // now fix the TOC by adding a real new segment + std::this_thread::sleep_for(std::chrono::seconds(1)); + std::vector> test_data_2 = { + {"ghi", "{c:3}"}, + }; + index.AddSegment(&test_data_2); + + // wait for reload to pick up the fixed TOC + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + + BOOST_CHECK(reader.Contains("abc")); + BOOST_CHECK(reader.Contains("def")); + BOOST_CHECK(reader.Contains("ghi")); +} + BOOST_AUTO_TEST_SUITE_END() } /* namespace index */ From 29aaadc62972af29efb969168a2fb8545199459a Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 25 May 2026 01:01:56 +0200 Subject: [PATCH 2/4] pre-commit fixes --- keyvi/tests/keyvi/index/read_only_index_test.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/keyvi/tests/keyvi/index/read_only_index_test.cpp b/keyvi/tests/keyvi/index/read_only_index_test.cpp index 4cd5cb1bb..bdaa4c19c 100644 --- a/keyvi/tests/keyvi/index/read_only_index_test.cpp +++ b/keyvi/tests/keyvi/index/read_only_index_test.cpp @@ -23,7 +23,9 @@ * Author: hendrik */ #include //NOLINT +#include #include //NOLINT +#include #include #include @@ -302,7 +304,8 @@ BOOST_AUTO_TEST_CASE(reloadRecoveryAfterMissingSegment) { testing::IndexMock index; std::vector> test_data = { - {"abc", "{a:1}"}, {"def", "{b:2}"}, + {"abc", "{a:1}"}, + {"def", "{b:2}"}, }; index.AddSegment(&test_data); From 5e2389d88943d75bb758acba9915f30efd33f27f Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 25 May 2026 01:04:53 +0200 Subject: [PATCH 3/4] clang tidy --- keyvi/include/keyvi/index/internal/index_reader_worker.h | 1 + 1 file changed, 1 insertion(+) diff --git a/keyvi/include/keyvi/index/internal/index_reader_worker.h b/keyvi/include/keyvi/index/internal/index_reader_worker.h index d7b92b738..508bcc683 100644 --- a/keyvi/include/keyvi/index/internal/index_reader_worker.h +++ b/keyvi/include/keyvi/index/internal/index_reader_worker.h @@ -30,6 +30,7 @@ #include #include //NOLINT #include +#include #include #include //NOLINT #include From 92b26582fdaf7cbac4f1458e2e6e27cb75d07415 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 25 May 2026 02:08:10 +0200 Subject: [PATCH 4/4] avoid assertion on mac --- keyvi/include/keyvi/index/internal/index_reader_worker.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/keyvi/include/keyvi/index/internal/index_reader_worker.h b/keyvi/include/keyvi/index/internal/index_reader_worker.h index 508bcc683..c5cae2ccc 100644 --- a/keyvi/include/keyvi/index/internal/index_reader_worker.h +++ b/keyvi/include/keyvi/index/internal/index_reader_worker.h @@ -153,6 +153,11 @@ class IndexReaderWorker final { index_toc.ParseStream(isw); TRACE("index_toc loaded"); + if (index_toc.HasParseError() || !index_toc.IsObject() || !index_toc.HasMember("files") || + !index_toc["files"].IsArray()) { + throw std::invalid_argument("invalid toc file"); + } + TRACE("reading segments"); read_only_segments_t new_segments = std::make_shared();