diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 527a412..c415fd5 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -18,7 +18,7 @@ jobs: with: duckdb_version: v1.5.3 ci_tools_version: v1.5-variegata - extension_name: duck_diff + extension_name: table_diff code-quality-check: name: Code Quality Check @@ -26,5 +26,5 @@ jobs: with: duckdb_version: v1.5.3 ci_tools_version: v1.5-variegata - extension_name: duck_diff + extension_name: table_diff format_checks: 'format;tidy' diff --git a/CMakeLists.txt b/CMakeLists.txt index 236ad57..e6678b6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.5) # Set extension name here -set(TARGET_NAME duck_diff) +set(TARGET_NAME table_diff) set(EXTENSION_NAME ${TARGET_NAME}_extension) set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) @@ -13,7 +13,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) include_directories(src/include) -set(EXTENSION_SOURCES src/duck_diff_extension.cpp) +set(EXTENSION_SOURCES src/table_diff_extension.cpp) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) diff --git a/Makefile b/Makefile index e7e0315..bb323e8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Configuration of extension -EXT_NAME=duck_diff +EXT_NAME=table_diff EXT_CONFIG=${PROJ_DIR}extension_config.cmake # Include the Makefile from extension-ci-tools diff --git a/README.md b/README.md index 0112bb7..e683ee1 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# duck_diff +# table_diff A DuckDB extension for diffing two relations (tables, SQL queries, etc.) off a primary key. Given a "left" and a "right" relation, it reports — per key — whether the row is @@ -10,7 +10,7 @@ subset of columns to diff or ignore. ## Quick start -Get a DuckDB shell with `duck_diff` loaded (see [Install](#install) or +Get a DuckDB shell with `table_diff` loaded (see [Install](#install) or [Building](#building)), then create two sample snapshots and diff them: ```sql @@ -69,23 +69,23 @@ FROM table_diff_summary('FROM users_v1', 'FROM users_v2', pk := 'id'); -- fals ## Install -Each [GitHub Release](https://github.com/avaitla/duck_diff/releases) attaches a +Each [GitHub Release](https://github.com/avaitla/duckdb-table-diff/releases) attaches a signed binary per platform. Download the one matching your DuckDB version and -platform, **saved as `duck_diff.duckdb_extension`** (DuckDB derives the +platform, **saved as `table_diff.duckdb_extension`** (DuckDB derives the extension name from the filename), then load it under `-unsigned` (the binaries are signed with a third-party key, so unsigned extensions must be enabled — a launch flag, not a `SET`): ```sh -curl -L -o duck_diff.duckdb_extension \ - https://github.com/avaitla/duck_diff/releases/download/v0.1.0/duck_diff-v1.5.2-osx_arm64.duckdb_extension +curl -L -o table_diff.duckdb_extension \ + https://github.com/avaitla/duckdb-table-diff/releases/download/v0.1.0/table_diff-v1.5.2-osx_arm64.duckdb_extension duckdb -unsigned ``` Load it with the full filepath: ```sql -LOAD '/path/to/duck_diff.duckdb_extension'; +LOAD '/path/to/table_diff.duckdb_extension'; SELECT * FROM table_diff('FROM a', 'FROM b', pk := 'id'); ``` @@ -200,13 +200,13 @@ single query you can also force one shared scan with a `WITH x AS MATERIALIZED ## Building The repo vendors DuckDB and the build tooling as submodules, so a clone + -`make` produces a DuckDB shell with `duck_diff` preloaded: +`make` produces a DuckDB shell with `table_diff` preloaded: ```sh -git clone --recurse-submodules https://github.com/avaitla/duck_diff -cd duck_diff +git clone --recurse-submodules https://github.com/avaitla/duckdb-table-diff +cd table_diff GEN=ninja make # first build compiles DuckDB; needs cmake + ninja -./build/release/duckdb # this shell already has duck_diff loaded +./build/release/duckdb # this shell already has table_diff loaded build/release/test/unittest "test/sql/*" # run the SQL test suite ``` @@ -218,26 +218,26 @@ bundled `json` extension is required (built in automatically for tests). ### Using it in another DuckDB The build also emits a loadable binary at -`build/release/extension/duck_diff/duck_diff.duckdb_extension`. It's locally +`build/release/extension/table_diff/table_diff.duckdb_extension`. It's locally built (unsigned), so load it with unsigned extensions enabled: ```sh duckdb -unsigned ``` ```sql -LOAD 'build/release/extension/duck_diff/duck_diff.duckdb_extension'; +LOAD 'build/release/extension/table_diff/table_diff.duckdb_extension'; SELECT * FROM table_diff('FROM a', 'FROM b', pk := 'id'); ``` -> **Installing without building:** each [GitHub Release](https://github.com/avaitla/duck_diff/releases) +> **Installing without building:** each [GitHub Release](https://github.com/avaitla/duckdb-table-diff/releases) > attaches signed, per-platform `.duckdb_extension` binaries (see > [docs/DISTRIBUTION.md](docs/DISTRIBUTION.md)). Download the one for your -> platform, **saved as `duck_diff.duckdb_extension`** (DuckDB derives the +> platform, **saved as `table_diff.duckdb_extension`** (DuckDB derives the > extension name from the filename), then `LOAD` it under `-unsigned`: > ```sh -> curl -L -o duck_diff.duckdb_extension \ -> https://github.com/avaitla/duck_diff/releases/download/v0.1.0/duck_diff-v1.5.2-osx_arm64.duckdb_extension -> duckdb -unsigned -c "LOAD 'duck_diff.duckdb_extension'; SELECT * FROM table_diff('FROM a','FROM b', pk:='id');" +> curl -L -o table_diff.duckdb_extension \ +> https://github.com/avaitla/duckdb-table-diff/releases/download/v0.1.0/table_diff-v1.5.2-osx_arm64.duckdb_extension +> duckdb -unsigned -c "LOAD 'table_diff.duckdb_extension'; SELECT * FROM table_diff('FROM a','FROM b', pk:='id');" > ``` ## TODO diff --git a/docs/DESIGN.md b/docs/DESIGN.md index 8268e6b..6b4b979 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -1,4 +1,4 @@ -# duck_diff — Design +# table_diff — Design A focused DuckDB extension that diffs two relations on a primary key and reports, per key, whether it is identical, different, or exists only on one side. diff --git a/docs/DISTRIBUTION.md b/docs/DISTRIBUTION.md index 401407f..28d92f9 100644 --- a/docs/DISTRIBUTION.md +++ b/docs/DISTRIBUTION.md @@ -1,6 +1,6 @@ # Distribution — signed binaries on GitHub Releases -`.github/workflows/Release.yml` builds `duck_diff` for every native platform on +`.github/workflows/Release.yml` builds `table_diff` for every native platform on each GitHub Release, signs each binary, and attaches the signed `.duckdb_extension` files (plus `SHA256SUMS`) to the release as assets. @@ -19,7 +19,7 @@ simpler, fully self-contained route. ```sh openssl genrsa -out private.pem 2048 -openssl rsa -in private.pem -pubout -out duck_diff-signing-key.pub # public half (committed) +openssl rsa -in private.pem -pubout -out table_diff-signing-key.pub # public half (committed) ``` Keep `private.pem` out of the repo. @@ -47,13 +47,13 @@ stamps the extension version from the tag (`git describe`). So a release is: 3. **Cut the release** (tag + publish in one step): ```sh git checkout main && git pull - gh release create v0.2.0 --target main --title "duck_diff v0.2.0" --notes "see workflow" + gh release create v0.2.0 --target main --title "table_diff v0.2.0" --notes "see workflow" ``` (Or GitHub UI → **Releases → Draft a new release** → create tag `v0.2.0` on `main` → **Publish**.) 4. **Done** — publishing fires `Release.yml`, which builds every platform, signs each binary, attaches them as - `duck_diff--.duckdb_extension` + `SHA256SUMS`, and + `table_diff--.duckdb_extension` + `SHA256SUMS`, and rewrites the release notes with install instructions, the source commit, and the checksums. Watch it with `gh run watch` if you like. @@ -69,17 +69,17 @@ stamps the extension version from the tag (`git describe`). So a release is: ## Installing (as a user) Download the `*.duckdb_extension` matching your DuckDB version and platform from -the release assets and **save it as `duck_diff.duckdb_extension`** — DuckDB +the release assets and **save it as `table_diff.duckdb_extension`** — DuckDB derives the extension name and entrypoint from the filename, so the name matters. It's signed with a third-party key, so launch with `-unsigned`: ```sh -curl -L -o duck_diff.duckdb_extension \ - https://github.com//duck_diff/releases/download/v0.1.0/duck_diff-v1.5.2-osx_arm64.duckdb_extension +curl -L -o table_diff.duckdb_extension \ + https://github.com//duckdb-table-diff/releases/download/v0.1.0/table_diff-v1.5.2-osx_arm64.duckdb_extension duckdb -unsigned ``` ```sql -LOAD 'duck_diff.duckdb_extension'; +LOAD 'table_diff.duckdb_extension'; FROM table_diff('FROM a', 'FROM b', pk := 'id'); ``` From a client library, enable unsigned extensions in the connection config (e.g. @@ -89,7 +89,7 @@ Python: `duckdb.connect(config={'allow_unsigned_extensions': True})`). Each release ships `SHA256SUMS` (also inlined in the notes) and is signed with the key whose public half is committed at -[`duck_diff-signing-key.pub`](../duck_diff-signing-key.pub): +[`table_diff-signing-key.pub`](../table_diff-signing-key.pub): ``` -----BEGIN PUBLIC KEY----- @@ -113,7 +113,7 @@ payload is the SHA256 composite of everything before it (1 MiB chunks each hashed, then the concatenation hashed — DuckDB's `compute-extension-hash.sh`): ```sh -F=duck_diff-v1.5.2-osx_arm64.duckdb_extension +F=table_diff-v1.5.2-osx_arm64.duckdb_extension size=$(wc -c < "$F") head -c $((size - 256)) "$F" > body tail -c 256 "$F" > sig @@ -121,7 +121,7 @@ tail -c 256 "$F" > sig split -b 1M body seg_ for f in seg_*; do openssl dgst -binary -sha256 "$f" >> chunks; rm "$f"; done openssl dgst -binary -sha256 chunks > hash -openssl pkeyutl -verify -pubin -inkey duck_diff-signing-key.pub \ +openssl pkeyutl -verify -pubin -inkey table_diff-signing-key.pub \ -sigfile sig -in hash -pkeyopt digest:sha256 # -> Signature Verified Successfully ``` diff --git a/docs/functions.md b/docs/functions.md index 6710413..6aaa51b 100644 --- a/docs/functions.md +++ b/docs/functions.md @@ -1,4 +1,4 @@ -# duck_diff function reference +# table_diff function reference `table_diff`, `table_diff_summary`, and `schema_diff` are table functions. The two relations are passed as **query strings**, written the way you diff --git a/examples/README.md b/examples/README.md index ac082f8..2436f32 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,7 +1,7 @@ # Testing your own SQL with `table_diff` — `duckdb` CLI only A copy-paste-into-your-project demonstration of how to write your own -regression tests with duck_diff's `table_diff`, in the [sqllogictest][slt] +regression tests with table_diff's `table_diff`, in the [sqllogictest][slt] format, needing **nothing but the `duckdb` CLI** — no source build, no `unittest` binary. @@ -10,7 +10,7 @@ make setup # checks that duckdb is on PATH make test # runs every tests/*.test ``` -The examples assume the `duck_diff` extension is installed (see the +The examples assume the `table_diff` extension is installed (see the [top-level README](../README.md#install)); each test `LOAD`s it. ## The examples @@ -25,7 +25,7 @@ A test defines a golden table, runs your transformation, and asserts that ``` statement ok -LOAD duck_diff; +LOAD table_diff; statement ok CREATE TABLE actual_revenue AS diff --git a/examples/run_sqllogictest.sh b/examples/run_sqllogictest.sh index c35a0f9..2b7e16c 100755 --- a/examples/run_sqllogictest.sh +++ b/examples/run_sqllogictest.sh @@ -26,7 +26,7 @@ DUCKDB="${DUCKDB:-duckdb}" TAB="$(printf '\t')" # `-unsigned` is harmless when no extension is used; it lets your tests -# `LOAD duck_diff;` (or any installed extension) if you want richer assertions. +# `LOAD table_diff;` (or any installed extension) if you want richer assertions. # Run a statement; output (incl. errors) on stdout, exit code preserved. slt_stmt() { "$DUCKDB" "$1" -unsigned -batch -init /dev/null -c "$2" 2>&1; } diff --git a/extension_config.cmake b/extension_config.cmake index d12c502..f9cda08 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -1,10 +1,10 @@ # This file is included by DuckDB's build system. It specifies which extension to load # Extension from this repo -duckdb_extension_load(duck_diff +duckdb_extension_load(table_diff SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} ) -# duck_diff generates SQL that uses json_object / json_merge_patch, so the json +# table_diff generates SQL that uses json_object / json_merge_patch, so the json # extension must be available. Build it in so tests can `require json`. duckdb_extension_load(json) diff --git a/src/include/duck_diff_extension.hpp b/src/include/table_diff_extension.hpp similarity index 81% rename from src/include/duck_diff_extension.hpp rename to src/include/table_diff_extension.hpp index f4183d3..09bd3cc 100644 --- a/src/include/duck_diff_extension.hpp +++ b/src/include/table_diff_extension.hpp @@ -4,7 +4,7 @@ namespace duckdb { -class DuckDiffExtension : public Extension { +class TableDiffExtension : public Extension { public: void Load(ExtensionLoader &db) override; std::string Name() override; diff --git a/src/duck_diff_extension.cpp b/src/table_diff_extension.cpp similarity index 98% rename from src/duck_diff_extension.cpp rename to src/table_diff_extension.cpp index 24b9a39..49e8200 100644 --- a/src/duck_diff_extension.cpp +++ b/src/table_diff_extension.cpp @@ -1,6 +1,6 @@ #define DUCKDB_EXTENSION_MAIN -#include "duck_diff_extension.hpp" +#include "table_diff_extension.hpp" #include "duckdb.hpp" #include "duckdb/common/exception.hpp" #include "duckdb/common/string_util.hpp" @@ -735,17 +735,17 @@ void LoadInternal(ExtensionLoader &loader) { } // namespace -void DuckDiffExtension::Load(ExtensionLoader &loader) { +void TableDiffExtension::Load(ExtensionLoader &loader) { LoadInternal(loader); } -std::string DuckDiffExtension::Name() { - return "duck_diff"; +std::string TableDiffExtension::Name() { + return "table_diff"; } -std::string DuckDiffExtension::Version() const { -#ifdef EXT_VERSION_DUCK_DIFF - return EXT_VERSION_DUCK_DIFF; +std::string TableDiffExtension::Version() const { +#ifdef EXT_VERSION_TABLE_DIFF + return EXT_VERSION_TABLE_DIFF; #else return ""; #endif @@ -755,7 +755,7 @@ std::string DuckDiffExtension::Version() const { extern "C" { -DUCKDB_CPP_EXTENSION_ENTRY(duck_diff, loader) { +DUCKDB_CPP_EXTENSION_ENTRY(table_diff, loader) { duckdb::LoadInternal(loader); } } diff --git a/duck_diff-signing-key.pub b/table_diff-signing-key.pub similarity index 100% rename from duck_diff-signing-key.pub rename to table_diff-signing-key.pub diff --git a/test/sql/schema_diff.test b/test/sql/schema_diff.test index 73f91bc..a82cd97 100644 --- a/test/sql/schema_diff.test +++ b/test/sql/schema_diff.test @@ -2,7 +2,7 @@ # description: schema_diff -- compare column names and types of two relations # group: [sql] -require duck_diff +require table_diff require json diff --git a/test/sql/table_diff.test b/test/sql/table_diff.test index 9792ef6..c45cdd7 100644 --- a/test/sql/table_diff.test +++ b/test/sql/table_diff.test @@ -11,7 +11,7 @@ SELECT * FROM table_diff('FROM l', 'FROM r', pk := 'id'); ---- Catalog Error -require duck_diff +require table_diff require json diff --git a/test/sql/table_diff_composite.test b/test/sql/table_diff_composite.test index 9840f78..d992144 100644 --- a/test/sql/table_diff_composite.test +++ b/test/sql/table_diff_composite.test @@ -2,7 +2,7 @@ # description: composite primary key (pk as a list) and join-back # group: [sql] -require duck_diff +require table_diff require json diff --git a/test/sql/table_diff_context.test b/test/sql/table_diff_context.test index b8ab13e..4156bf1 100644 --- a/test/sql/table_diff_context.test +++ b/test/sql/table_diff_context.test @@ -2,7 +2,7 @@ # description: context columns -- 'context' pulls extra (non-compared) columns into the _left/_right expansion # group: [sql] -require duck_diff +require table_diff require json diff --git a/test/sql/table_diff_errors.test b/test/sql/table_diff_errors.test index f710be4..15c2da0 100644 --- a/test/sql/table_diff_errors.test +++ b/test/sql/table_diff_errors.test @@ -2,7 +2,7 @@ # description: v1 error cases (required pk, missing key, duplicate keys) # group: [sql] -require duck_diff +require table_diff require json diff --git a/test/sql/table_diff_expand.test b/test/sql/table_diff_expand.test index fc089a8..2d0a7d2 100644 --- a/test/sql/table_diff_expand.test +++ b/test/sql/table_diff_expand.test @@ -2,7 +2,7 @@ # description: per-column expansion -- compared columns always emit _left/_right (native types) + _diff_status, context columns emit _left/_right # group: [sql] -require duck_diff +require table_diff require json diff --git a/test/sql/table_diff_normalize.test b/test/sql/table_diff_normalize.test index f7a01e9..050a19c 100644 --- a/test/sql/table_diff_normalize.test +++ b/test/sql/table_diff_normalize.test @@ -2,7 +2,7 @@ # description: value-normalization flags (numeric_tolerance, timestamp_precision, null_equals_empty) # group: [sql] -require duck_diff +require table_diff require json diff --git a/test/sql/table_diff_schema.test b/test/sql/table_diff_schema.test index e89ca5f..2f7858c 100644 --- a/test/sql/table_diff_schema.test +++ b/test/sql/table_diff_schema.test @@ -2,7 +2,7 @@ # description: schema reconciliation (require_matching_columns / upcast_types, columns, ignore, type mismatch, prefix/collision) # group: [sql] -require duck_diff +require table_diff require json diff --git a/test/sql/table_diff_summary.test b/test/sql/table_diff_summary.test index 2dad98c..8277805 100644 --- a/test/sql/table_diff_summary.test +++ b/test/sql/table_diff_summary.test @@ -2,7 +2,7 @@ # description: table_diff_summary counts and equality check # group: [sql] -require duck_diff +require table_diff require json diff --git a/test/sql/table_diff_types.test b/test/sql/table_diff_types.test index 53403d9..f1ddcf1 100644 --- a/test/sql/table_diff_types.test +++ b/test/sql/table_diff_types.test @@ -2,7 +2,7 @@ # description: table_diff across a range of data types (bool, bigint, decimal, date, timestamp, blob) # group: [sql] -require duck_diff +require table_diff require json