From e940a293816f8e2a1fb0abff54bccaab61eb0520 Mon Sep 17 00:00:00 2001 From: Sam Goyal Date: Wed, 10 Jun 2026 14:47:22 -0700 Subject: [PATCH] src/clyso/ceph/ai/report: add a check for affected osds for data corruption bug --- otto/src/clyso/ceph/ai/report.py | 50 ++++++++++++++++++++++++++++++++ tests/otto.json | 17 ++++++++--- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/otto/src/clyso/ceph/ai/report.py b/otto/src/clyso/ceph/ai/report.py index 37e589c..2a7daf1 100644 --- a/otto/src/clyso/ceph/ai/report.py +++ b/otto/src/clyso/ceph/ai/report.py @@ -139,6 +139,56 @@ def _handle_non_recommended_version(result, ver, rec_versions, rec_minor) -> Non result.add_check_result("Version", "Release", "WARN", summary, detail, recommend) +@add_check +def check_elastic_shared_blob_affected_osds(result: AIResult, data: CephData) -> None: + if data.ceph_report is None: + return + report = data.ceph_report + section = "OSD Health" + check = "Elastic Shared Blob Bug" + affected_osds = [] + osd_metadata = report.osd_metadata + + for osd in osd_metadata: + created_ver_str = getattr(osd, "ceph_version_when_created", None) + if not created_ver_str: + continue + try: + created_ver = created_ver_str.split()[2] + created_ver_parsed = version.parse(created_ver) + if version.parse("19.2.0") <= created_ver_parsed < version.parse( + "19.2.4" + ) or created_ver_parsed == version.parse("20.2.0"): + affected_osds.append(f"osd.{osd.id}") + except Exception: + continue + + if not affected_osds: + passfail = "PASS" + summary = "No OSDs created during versions affected by Elastic Shared Blob bug" + detail = [ + "No OSDs were created during Squid 19.2.0–19.2.3 or Tentacle 20.2.0, which are affected by " + "the Elastic Shared Blob (ESB) bug (https://tracker.ceph.com/issues/70390)." + ] + recommend = [] + else: + passfail = "WARN" + summary = f"{len(affected_osds)} OSD(s) may be affected by the Elastic Shared Blob bug" + detail = [ + f"The following OSD(s) were created during an affected version (Squid 19.2.0–19.2.3 or " + f"Tentacle 20.2.0) and may be affected by the Elastic Shared Blob (ESB) bug " + f"(https://tracker.ceph.com/issues/70390): {', '.join(affected_osds)}. " + f"This bug can cause OSD crashes and potential data corruption. " + f"Note that v19.2.4 and v20.2.1 prevent new corruptions but cannot detect or repair existing ones." + ] + recommend = [ + 'Run "ceph config set osd bluestore_elastic_shared_blobs 0" to prevent new corruptions. ' + "OSDs created during affected versions should be completely recreated to ensure data integrity." + ] + + result.add_check_result(section, check, passfail, summary, detail, recommend) + + @add_check def check_report_known_bugs(result: AIResult, data: CephData) -> None: if data.ceph_report is None: diff --git a/tests/otto.json b/tests/otto.json index 13bd896..ce4942d 100644 --- a/tests/otto.json +++ b/tests/otto.json @@ -1,8 +1,8 @@ { "summary": { - "score": 26.0, + "score": 27.0, "grade": "F", - "max_score": 33 + "max_score": 34 }, "sections": [ { @@ -319,8 +319,8 @@ }, { "id": "OSD Health", - "score": 10.0, - "max_score": 13, + "score": 11.0, + "max_score": 14, "summary": "", "info": [ { @@ -330,6 +330,15 @@ } ], "checks": [ + { + "id": "Elastic Shared Blob Bug", + "result": "PASS", + "summary": "No OSDs created during versions affected by Elastic Shared Blob bug", + "detail": [ + "No OSDs were created during Squid 19.2.0–19.2.3 or Tentacle 20.2.0, which are affected by the Elastic Shared Blob (ESB) bug (https://tracker.ceph.com/issues/70390)." + ], + "recommend": [] + }, { "id": "Check osdmap flags", "result": "PASS",