From d21adbb0f64b16158a54aea3b6c19b0e51f046ff Mon Sep 17 00:00:00 2001 From: Anil Vaitla Date: Tue, 2 Jun 2026 16:55:03 -0500 Subject: [PATCH 1/2] table_diff: report which input has duplicate primary keys The duplicate-key check raised a single generic "duplicate primary key values found in input" error regardless of which side was at fault. Split it into left/right/both cases that name the offending input and echo the relation argument, so users can immediately tell which table to fix. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/duck_diff_extension.cpp | 19 +++++++++++++++---- test/sql/table_diff_errors.test | 13 +++++++++++-- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/duck_diff_extension.cpp b/src/duck_diff_extension.cpp index 0e1cce0..c9a1d4e 100644 --- a/src/duck_diff_extension.cpp +++ b/src/duck_diff_extension.cpp @@ -543,15 +543,26 @@ string BuildDiffSQL(const DiffPlan &plan, const string &status_col, const string } } - string dup_cond = "EXISTS (SELECT 1 FROM __l GROUP BY " + key_list + " HAVING count(*) > 1) OR " + - "EXISTS (SELECT 1 FROM __r GROUP BY " + key_list + " HAVING count(*) > 1)"; + // Detect duplicate keys per side so the error can name which input is at + // fault (and echo the relation text). The relation strings are embedded in + // the message via QuoteLiteral so any quotes/specials are escaped. + string dup_left = "EXISTS (SELECT 1 FROM __l GROUP BY " + key_list + " HAVING count(*) > 1)"; + string dup_right = "EXISTS (SELECT 1 FROM __r GROUP BY " + key_list + " HAVING count(*) > 1)"; + string msg_both = QuoteLiteral("table_diff: duplicate primary key values found in both the left (" + plan.left + + ") and right (" + plan.right + ") inputs"); + string msg_left = QuoteLiteral("table_diff: duplicate primary key values found in the left input (" + plan.left + + ")"); + string msg_right = + QuoteLiteral("table_diff: duplicate primary key values found in the right input (" + plan.right + ")"); + string dup_check = "CASE WHEN " + dup_left + " AND " + dup_right + " THEN error(" + msg_both + ") WHEN " + + dup_left + " THEN error(" + msg_left + ") WHEN " + dup_right + " THEN error(" + msg_right + + ") END"; string sql = "WITH __l AS (SELECT __t.*, TRUE AS __p FROM (" + plan.left + ") __t), " + "__r AS (SELECT __t.*, TRUE AS __p FROM (" + plan.right + ") __t) " + "SELECT " + key_select + "CASE WHEN r.__p IS NULL THEN 'left_only' WHEN l.__p IS NULL THEN 'right_only' WHEN " + all_eq + " THEN 'identical' ELSE 'different' END AS " + QuoteIdent(status_col) + middle_select + - " FROM __l AS l FULL OUTER JOIN __r AS r ON " + join_cond + " WHERE (CASE WHEN " + dup_cond + - " THEN error('table_diff: duplicate primary key values found in input') END) IS NULL"; + " FROM __l AS l FULL OUTER JOIN __r AS r ON " + join_cond + " WHERE (" + dup_check + ") IS NULL"; return sql; } diff --git a/test/sql/table_diff_errors.test b/test/sql/table_diff_errors.test index d0695de..f710be4 100644 --- a/test/sql/table_diff_errors.test +++ b/test/sql/table_diff_errors.test @@ -50,7 +50,7 @@ INSERT INTO dr VALUES (1, 1); statement error SELECT * FROM table_diff('FROM dl', 'FROM dr', pk := 'id'); ---- -duplicate +duplicate primary key values found in the left input (FROM dl) # duplicate on the right side is also an error statement ok @@ -62,7 +62,16 @@ INSERT INTO dr VALUES (1, 9); statement error SELECT * FROM table_diff('FROM dl', 'FROM dr', pk := 'id'); ---- -duplicate +duplicate primary key values found in the right input (FROM dr) + +# duplicates on both sides are reported together +statement ok +INSERT INTO dl VALUES (1, 7); + +statement error +SELECT * FROM table_diff('FROM dl', 'FROM dr', pk := 'id'); +---- +duplicate primary key values found in both the left (FROM dl) and right (FROM dr) inputs # ---------------------------------------------------------------------------- # Relation arguments are queries: a bare relation name is rejected, the explicit From 81fbd1705681772d475dcfdc588352cdf0a43792 Mon Sep 17 00:00:00 2001 From: Anil Vaitla Date: Thu, 4 Jun 2026 09:56:36 -0500 Subject: [PATCH 2/2] style: clang-format src/duck_diff_extension.cpp (format-check) Co-Authored-By: Claude Opus 4.8 (1M context) --- src/duck_diff_extension.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/duck_diff_extension.cpp b/src/duck_diff_extension.cpp index c9a1d4e..24b9a39 100644 --- a/src/duck_diff_extension.cpp +++ b/src/duck_diff_extension.cpp @@ -550,8 +550,8 @@ string BuildDiffSQL(const DiffPlan &plan, const string &status_col, const string string dup_right = "EXISTS (SELECT 1 FROM __r GROUP BY " + key_list + " HAVING count(*) > 1)"; string msg_both = QuoteLiteral("table_diff: duplicate primary key values found in both the left (" + plan.left + ") and right (" + plan.right + ") inputs"); - string msg_left = QuoteLiteral("table_diff: duplicate primary key values found in the left input (" + plan.left + - ")"); + string msg_left = + QuoteLiteral("table_diff: duplicate primary key values found in the left input (" + plan.left + ")"); string msg_right = QuoteLiteral("table_diff: duplicate primary key values found in the right input (" + plan.right + ")"); string dup_check = "CASE WHEN " + dup_left + " AND " + dup_right + " THEN error(" + msg_both + ") WHEN " +