diff --git a/docs/reference/cli.md b/docs/reference/cli.md
index 56becfc..4d5b85f 100644
--- a/docs/reference/cli.md
+++ b/docs/reference/cli.md
@@ -606,6 +606,7 @@ datasight quality [OPTIONS]
 | --- | --- |
 | `--project-dir` | Project directory containing .env and config files. Default: `.`. |
 | `--table` | Audit a specific table. |
+| `--deep` | Run expensive detectors: whole-row and PK-shaped duplicates, text whitespace/empty-string flags, IQR-based numeric outliers, and orphan foreign-key-shaped values. |
 | `--format` | Output format (default: table). Default: `table`. |
 | `--output`, `-o` | Write the quality audit to a file instead of stdout. |
 
diff --git a/docs/use/how-to/audit-data-quality.md b/docs/use/how-to/audit-data-quality.md
index 9592f62..75fae68 100644
--- a/docs/use/how-to/audit-data-quality.md
+++ b/docs/use/how-to/audit-data-quality.md
@@ -65,6 +65,40 @@ Use it to spot:
 - quick notes worth turning into follow-up questions
 - temporal completeness issues when [`time_series.yaml`](../../project-setup/how-to/declare-time-series.md) is present
 
+The default pass batches per-column null and numeric scans into one query
+per table, so it stays cheap on wide schemas.
+
+### Deeper checks: `datasight quality --deep`
+
+Add `--deep` to run the more expensive detectors and emit previewable
+cleanup SQL alongside each finding:
+
+```bash
+datasight quality --deep
+datasight quality --deep --format markdown -o quality-deep.md
+```
+
+`--deep` adds:
+
+- **Whole-row duplicates** — rows that are exact duplicates across all columns
+- **Primary-key-shaped duplicates** — values appearing more than once in
+  any `id`, `*_id`, or `id_*` column
+- **Text cleanliness** — counts of values with leading or trailing
+  whitespace, and counts of empty strings used in place of NULL
+- **Numeric outliers (IQR)** — counts of values outside the
+  `[Q1 − 1.5·IQR, Q3 + 1.5·IQR]` fence (skipped on SQLite, which has no
+  percentile aggregate)
+- **Orphan foreign-key-shaped values** — values in `<parent>_id` columns
+  that don't appear in `<parent>.<id>`, detected against any parent
+  table with exactly one ID-shaped column
+
+Each finding includes a `cleanup_sql` field with a previewable `SELECT`
+that shows the candidate rows. Destructive forms
+(`UPDATE`, `CREATE OR REPLACE TABLE`) appear only as comments inside the
+preview — datasight never auto-mutates your tables. The CLI table and
+markdown outputs render the cleanup SQL in a dedicated
+**Suggested Cleanup** section.
+
 ## Detect untidy column shapes
 
 !!! warning "Experimental"
@@ -398,7 +432,7 @@ directory name appears in the report title.
 For a thorough data quality audit, run the commands in this order:
 
 1. **Profile** — understand the shape of the data
-2. **Quality** — find nulls, range issues, date gaps, and untidy column shapes
+2. **Quality** — find nulls, range issues, date gaps, and untidy column shapes (add `--deep` for duplicates, text cleanliness, outliers, and orphan-FK checks with previewable cleanup SQL)
 3. **Integrity** — verify primary keys, foreign keys, and join behavior
 4. **Distribution** — inspect percentiles, outliers, and temporal spikes
 5. **Measures** — identify metrics and verify aggregation defaults
diff --git a/src/datasight/audit_report.py b/src/datasight/audit_report.py
index 840d280..32d885e 100644
--- a/src/datasight/audit_report.py
+++ b/src/datasight/audit_report.py
@@ -20,10 +20,15 @@ async def build_audit_report(
     validation_rules: list[dict[str, Any]] | None = None,
     declared_joins: list[dict[str, Any]] | None = None,
     project_name: str | None = None,
+    *,
+    sql_dialect: str = "duckdb",
+    deep: bool = False,
 ) -> dict[str, Any]:
     """Run all audit checks and assemble a composite report."""
     dataset_overview = await build_dataset_overview(schema_info, run_sql)
-    quality = await build_quality_overview(schema_info, run_sql)
+    quality = await build_quality_overview(
+        schema_info, run_sql, sql_dialect=sql_dialect, deep=deep
+    )
     integrity = await build_integrity_overview(schema_info, run_sql, declared_joins)
     distribution = await build_distribution_overview(schema_info, run_sql, overrides)
 
diff --git a/src/datasight/cleanup.py b/src/datasight/cleanup.py
new file mode 100644
index 0000000..58c3f20
--- /dev/null
+++ b/src/datasight/cleanup.py
@@ -0,0 +1,130 @@
+"""Emit previewable cleanup SQL for findings from ``build_quality_overview``.
+
+Mirrors the ``tidy`` module's preview-first ethos: each function returns a
+single SQL string that lets the user *see* the rows in question, never an
+UPDATE/DELETE that auto-mutates the table. The returned SQL is safe to copy
+into a query window or attach to a quality report.
+
+For destructive operations (deduplication, NULLIF rewrites, TRIM rewrites)
+the preview is a ``SELECT`` that shows the candidate rows or the rewritten
+column alongside the original; the caller can wrap it in an
+``UPDATE``/``CREATE OR REPLACE TABLE`` once they've reviewed the preview.
+
+Dialects supported: ``duckdb``, ``sqlite``, ``postgres``. Where a dialect
+lacks a feature (e.g. SQLite has no ``QUALIFY`` and no percentile
+aggregates), the function falls back to portable SQL.
+"""
+
+from __future__ import annotations
+
+from datasight.schema import _quote_identifier
+
+
+def empty_string_preview(table: str, column: str, dialect: str) -> str:
+    """Show rows where the column is the empty string, candidates for NULLIF."""
+    qt = _quote_identifier(table)
+    qc = _quote_identifier(column)
+    return (
+        f"-- Rows where {column!r} is an empty string. "
+        f"To fix: UPDATE {qt} SET {qc} = NULL WHERE {qc} = '';\n"
+        f"SELECT * FROM {qt} WHERE {qc} = '';"
+    )
+
+
+def whitespace_preview(table: str, column: str, dialect: str) -> str:
+    """Show rows whose column value has surrounding whitespace."""
+    qt = _quote_identifier(table)
+    qc = _quote_identifier(column)
+    return (
+        f"-- Rows where {column!r} has leading/trailing whitespace. "
+        f"To fix: UPDATE {qt} SET {qc} = TRIM({qc}) WHERE {qc} <> TRIM({qc});\n"
+        f"SELECT {qc} AS original, TRIM({qc}) AS trimmed FROM {qt} "
+        f"WHERE {qc} IS NOT NULL AND {qc} <> TRIM({qc});"
+    )
+
+
+def whole_row_dedup_preview(table: str, dialect: str) -> str:
+    """Preview a deduplicated copy of the table."""
+    qt = _quote_identifier(table)
+    if dialect == "duckdb":
+        materialize = (
+            f"-- To materialize: CREATE OR REPLACE TABLE {qt} AS SELECT DISTINCT * FROM {qt};"
+        )
+    else:
+        materialize = (
+            f"-- To materialize: BEGIN; DROP TABLE IF EXISTS {qt}_deduped; "
+            f"CREATE TABLE {qt}_deduped AS SELECT DISTINCT * FROM {qt}; COMMIT;"
+        )
+    return f"{materialize}\nSELECT DISTINCT * FROM {qt};"
+
+
+def pk_dedup_preview(table: str, pk_column: str, dialect: str) -> str:
+    """Show one canonical row per duplicate PK value.
+
+    DuckDB uses ``QUALIFY`` for a one-liner; Postgres uses a CTE with
+    ``ROW_NUMBER``; SQLite falls back to ``MIN(rowid)``.
+    """
+    qt = _quote_identifier(table)
+    qc = _quote_identifier(pk_column)
+    if dialect == "duckdb":
+        return (
+            f"-- One canonical row per duplicate {pk_column!r} value.\n"
+            f"SELECT * FROM {qt} "
+            f"QUALIFY ROW_NUMBER() OVER (PARTITION BY {qc} ORDER BY {qc}) = 1;"
+        )
+    if dialect == "postgres":
+        return (
+            f"-- One canonical row per duplicate {pk_column!r} value.\n"
+            f"WITH ranked AS (\n"
+            f"  SELECT *, ROW_NUMBER() OVER (PARTITION BY {qc} ORDER BY {qc}) AS rn "
+            f"FROM {qt}\n"
+            f") SELECT * FROM ranked WHERE rn = 1;"
+        )
+    # sqlite
+    return (
+        f"-- One canonical row per duplicate {pk_column!r} value (SQLite uses rowid).\n"
+        f"SELECT * FROM {qt} WHERE rowid IN "
+        f"(SELECT MIN(rowid) FROM {qt} GROUP BY {qc});"
+    )
+
+
+def outlier_preview(table: str, column: str, q1: str | None, q3: str | None, dialect: str) -> str:
+    """Show rows whose value falls outside the IQR fence.
+
+    ``q1`` / ``q3`` are stringified scalars from the original detector
+    query so we can inline them as literals instead of re-running the
+    percentile aggregate.
+    """
+    qt = _quote_identifier(table)
+    qc = _quote_identifier(column)
+    if q1 is None or q3 is None:
+        return (
+            f"-- Inspect outliers in {column!r} (recompute IQR fence as needed).\n"
+            f"SELECT * FROM {qt} WHERE {qc} IS NOT NULL ORDER BY {qc} DESC LIMIT 20;"
+        )
+    return (
+        f"-- Rows in {column!r} outside the IQR fence [q1={q1}, q3={q3}].\n"
+        f"SELECT * FROM {qt} WHERE {qc} IS NOT NULL "
+        f"AND ({qc} < {q1} - 1.5 * ({q3} - {q1}) "
+        f"OR {qc} > {q3} + 1.5 * ({q3} - {q1}));"
+    )
+
+
+def orphan_fk_preview(
+    table: str,
+    column: str,
+    parent_table: str,
+    parent_column: str,
+    dialect: str,
+) -> str:
+    """Show distinct child values not present in the parent's PK column."""
+    qt = _quote_identifier(table)
+    qc = _quote_identifier(column)
+    qpt = _quote_identifier(parent_table)
+    qpc = _quote_identifier(parent_column)
+    return (
+        f"-- Distinct {table}.{column} values not present in {parent_table}.{parent_column}.\n"
+        f"SELECT DISTINCT {qc} FROM {qt} "
+        f"WHERE {qc} IS NOT NULL "
+        f"AND {qc} NOT IN (SELECT {qpc} FROM {qpt} WHERE {qpc} IS NOT NULL);"
+    )
diff --git a/src/datasight/cli.py b/src/datasight/cli.py
index 525b549..4955f76 100644
--- a/src/datasight/cli.py
+++ b/src/datasight/cli.py
@@ -496,6 +496,73 @@ def render_quality_markdown(quality_data: dict[str, Any]) -> str:  # noqa: C901
         lines.extend(["", "## Wide Tables"])
         for item in quality_data["wide_tables"]:
             lines.append(f"- `{item['table']}`: {item['reason']}")
+
+    cleanup_blocks: list[tuple[str, str]] = []
+
+    if quality_data.get("duplicate_rows"):
+        lines.extend(["", "## Whole-Row Duplicates"])
+        for item in quality_data["duplicate_rows"]:
+            lines.append(f"- `{item['table']}`: {item['duplicate_count']} duplicate row(s)")
+            if item.get("cleanup_sql"):
+                cleanup_blocks.append((f"{item['table']} (whole-row dedup)", item["cleanup_sql"]))
+    if quality_data.get("pk_duplicates"):
+        lines.extend(["", "## Primary-Key-Shaped Duplicates"])
+        for item in quality_data["pk_duplicates"]:
+            sample = ", ".join(f"{e['value']} (×{e['count']})" for e in item["examples"][:3])
+            lines.append(f"- `{item['table']}.{item['column']}`: duplicate values — {sample}")
+            if item.get("cleanup_sql"):
+                cleanup_blocks.append(
+                    (f"{item['table']}.{item['column']} (PK dedup)", item["cleanup_sql"])
+                )
+    if quality_data.get("text_flags"):
+        lines.extend(["", "## Text Cleanliness"])
+        for item in quality_data["text_flags"]:
+            lines.append(
+                f"- `{item['table']}.{item['column']}`: {item['issue']} ({item['count']} row(s))"
+            )
+            if item.get("cleanup_sql"):
+                cleanup_blocks.append(
+                    (
+                        f"{item['table']}.{item['column']} ({item['issue']})",
+                        item["cleanup_sql"],
+                    )
+                )
+    if quality_data.get("outlier_flags"):
+        lines.extend(["", "## Numeric Outliers (IQR)"])
+        for item in quality_data["outlier_flags"]:
+            lines.append(
+                f"- `{item['table']}.{item['column']}`: {item['outlier_count']} row(s) outside "
+                f"IQR fence [q1={item.get('q1')}, q3={item.get('q3')}]"
+            )
+            if item.get("cleanup_sql"):
+                cleanup_blocks.append(
+                    (f"{item['table']}.{item['column']} (outliers)", item["cleanup_sql"])
+                )
+    if quality_data.get("orphan_flags"):
+        lines.extend(["", "## Orphan Foreign-Key-Shaped Values"])
+        for item in quality_data["orphan_flags"]:
+            lines.append(
+                f"- `{item['table']}.{item['column']}` → `{item['parent_table']}.{item['parent_column']}`: "
+                f"{item['orphan_count']} orphan value(s)"
+            )
+            if item.get("cleanup_sql"):
+                cleanup_blocks.append(
+                    (
+                        f"{item['table']}.{item['column']} (orphans → {item['parent_table']})",
+                        item["cleanup_sql"],
+                    )
+                )
+
+    if cleanup_blocks:
+        lines.extend(["", "## Suggested Cleanup"])
+        for title, sql in cleanup_blocks:
+            lines.append("")
+            lines.append(f"### {title}")
+            lines.append("")
+            lines.append("```sql")
+            lines.extend(sql.splitlines())
+            lines.append("```")
+
     if quality_data["notes"]:
         lines.extend(["", "## Notes"])
         for item in quality_data["notes"]:
diff --git a/src/datasight/cli_commands/audit_report.py b/src/datasight/cli_commands/audit_report.py
index 17f4731..421f513 100644
--- a/src/datasight/cli_commands/audit_report.py
+++ b/src/datasight/cli_commands/audit_report.py
@@ -104,6 +104,7 @@ async def _run_audit_report():
             validation_rules,
             declared_joins,
             project_name=Path(project_dir).name,
+            sql_dialect=settings.database.sql_dialect,
         )
 
     report_data = asyncio.run(_run_audit_report())
diff --git a/src/datasight/cli_commands/inspect.py b/src/datasight/cli_commands/inspect.py
index 06b2e66..dd56bbd 100644
--- a/src/datasight/cli_commands/inspect.py
+++ b/src/datasight/cli_commands/inspect.py
@@ -94,7 +94,12 @@ async def _run_all():
             "profiling tables", build_dataset_overview(schema_info, runner.run_sql)
         )
         quality_data = await _run_phase(
-            "running quality checks", build_quality_overview(schema_info, runner.run_sql)
+            "running quality checks",
+            build_quality_overview(
+                schema_info,
+                runner.run_sql,
+                sql_dialect=(db_settings.sql_dialect if db_settings else "duckdb"),
+            ),
         )
         measure_data = await _run_phase(
             "discovering measures",
diff --git a/src/datasight/cli_commands/quality.py b/src/datasight/cli_commands/quality.py
index b46b1b2..0e1c006 100644
--- a/src/datasight/cli_commands/quality.py
+++ b/src/datasight/cli_commands/quality.py
@@ -35,6 +35,16 @@
     help="Project directory containing .env and config files.",
 )
 @click.option("--table", default=None, help="Audit a specific table.")
+@click.option(
+    "--deep",
+    is_flag=True,
+    default=False,
+    help=(
+        "Run expensive detectors: whole-row and PK-shaped duplicates, text "
+        "whitespace/empty-string flags, IQR-based numeric outliers, and "
+        "orphan foreign-key-shaped values."
+    ),
+)
 @click.option(
     "--format",
     "output_format",
@@ -50,7 +60,7 @@
     default=None,
     help="Write the quality audit to a file instead of stdout.",
 )
-def quality(project_dir, table, output_format, output_path):  # noqa: C901
+def quality(project_dir, table, deep, output_format, output_path):  # noqa: C901
     """Audit data quality - nulls, suspicious ranges, and date coverage.
 
     Also checks temporal completeness when time_series.yaml defines expected
@@ -79,7 +89,12 @@ async def _run_quality():
                 msg = f"Table not found: {table}"
                 raise click.ClickException(msg)
             schema_info = [table_info]
-        base = await build_quality_overview(schema_info, sql_runner.run_sql)
+        base = await build_quality_overview(
+            schema_info,
+            sql_runner.run_sql,
+            sql_dialect=settings.database.sql_dialect,
+            deep=deep,
+        )
         ts_configs = time_series_configs
         if table and ts_configs:
             ts_configs = [c for c in ts_configs if c["table"].lower() == table.lower()]
@@ -220,6 +235,121 @@ async def _run_quality():
                 ],
             )
         )
+    cleanup_blocks: list[tuple[str, str]] = []
+    if quality_data.get("duplicate_rows"):
+        console.print(
+            cli.build_profile_detail_table(
+                "Whole-Row Duplicates",
+                [("Table", "left"), ("Duplicate rows", "right")],
+                [
+                    [item["table"], str(item["duplicate_count"])]
+                    for item in quality_data["duplicate_rows"]
+                ],
+            )
+        )
+        for item in quality_data["duplicate_rows"]:
+            if item.get("cleanup_sql"):
+                cleanup_blocks.append((f"{item['table']} (whole-row dedup)", item["cleanup_sql"]))
+    if quality_data.get("pk_duplicates"):
+        console.print(
+            cli.build_profile_detail_table(
+                "Primary-Key-Shaped Duplicates",
+                [("Column", "left"), ("Sample duplicates", "left")],
+                [
+                    [
+                        f"{item['table']}.{item['column']}",
+                        ", ".join(f"{e['value']} (×{e['count']})" for e in item["examples"][:3]),
+                    ]
+                    for item in quality_data["pk_duplicates"]
+                ],
+            )
+        )
+        for item in quality_data["pk_duplicates"]:
+            if item.get("cleanup_sql"):
+                cleanup_blocks.append(
+                    (f"{item['table']}.{item['column']} (PK dedup)", item["cleanup_sql"])
+                )
+    if quality_data.get("text_flags"):
+        console.print(
+            cli.build_profile_detail_table(
+                "Text Cleanliness",
+                [("Column", "left"), ("Issue", "left"), ("Rows", "right")],
+                [
+                    [
+                        f"{item['table']}.{item['column']}",
+                        item["issue"],
+                        str(item["count"]),
+                    ]
+                    for item in quality_data["text_flags"]
+                ],
+            )
+        )
+        for item in quality_data["text_flags"]:
+            if item.get("cleanup_sql"):
+                cleanup_blocks.append(
+                    (
+                        f"{item['table']}.{item['column']} ({item['issue']})",
+                        item["cleanup_sql"],
+                    )
+                )
+    if quality_data.get("outlier_flags"):
+        console.print(
+            cli.build_profile_detail_table(
+                "Numeric Outliers (IQR)",
+                [("Column", "left"), ("Rows", "right"), ("Q1", "right"), ("Q3", "right")],
+                [
+                    [
+                        f"{item['table']}.{item['column']}",
+                        str(item["outlier_count"]),
+                        cli.format_profile_value(item.get("q1")),
+                        cli.format_profile_value(item.get("q3")),
+                    ]
+                    for item in quality_data["outlier_flags"]
+                ],
+            )
+        )
+        for item in quality_data["outlier_flags"]:
+            if item.get("cleanup_sql"):
+                cleanup_blocks.append(
+                    (f"{item['table']}.{item['column']} (outliers)", item["cleanup_sql"])
+                )
+    if quality_data.get("orphan_flags"):
+        console.print(
+            cli.build_profile_detail_table(
+                "Orphan Foreign-Key-Shaped Values",
+                [("Child", "left"), ("Parent", "left"), ("Orphans", "right")],
+                [
+                    [
+                        f"{item['table']}.{item['column']}",
+                        f"{item['parent_table']}.{item['parent_column']}",
+                        str(item["orphan_count"]),
+                    ]
+                    for item in quality_data["orphan_flags"]
+                ],
+            )
+        )
+        for item in quality_data["orphan_flags"]:
+            if item.get("cleanup_sql"):
+                cleanup_blocks.append(
+                    (
+                        f"{item['table']}.{item['column']} (orphans → {item['parent_table']})",
+                        item["cleanup_sql"],
+                    )
+                )
+    if cleanup_blocks:
+        from rich.panel import Panel
+        from rich.syntax import Syntax
+
+        console.print()
+        console.print("[bold]Suggested Cleanup[/bold]")
+        for title, sql in cleanup_blocks:
+            console.print(
+                Panel(
+                    Syntax(sql, "sql", theme="ansi_dark", word_wrap=True),
+                    title=title,
+                    border_style="dim",
+                )
+            )
     if quality_data["notes"]:
         console.print(
             cli.build_profile_detail_table(
diff --git a/src/datasight/data_profile.py b/src/datasight/data_profile.py
index d8f2abf..033b7d6 100644
--- a/src/datasight/data_profile.py
+++ b/src/datasight/data_profile.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import math
 from typing import Any
 
 import yaml
@@ -841,28 +842,50 @@ def format_measure_prompt_context(measure_data: dict[str, Any]) -> str:
 async def build_quality_overview(  # noqa: C901
     schema_info: list[dict[str, Any]],
     run_sql: RunSql,
+    *,
+    sql_dialect: str = "duckdb",
+    deep: bool = False,
 ) -> dict[str, Any]:
-    """Build a deterministic overview focused on data quality signals."""
+    """Build a deterministic overview focused on data quality signals.
+
+    ``sql_dialect`` selects between ``duckdb``, ``sqlite``, and ``postgres``
+    SQL where the checks need dialect-specific syntax (percentile, qualify,
+    multi-column distinct). ``deep=True`` enables the more expensive
+    detectors: whole-row duplicates, PK-shaped duplicates, text-cleanliness
+    flags, IQR-based numeric outliers, and orphan foreign-key-shaped values.
+    """
     null_columns: list[dict[str, Any]] = []
     numeric_flags: list[dict[str, Any]] = []
     date_columns: list[dict[str, Any]] = []
+    duplicate_rows: list[dict[str, Any]] = []
+    pk_duplicates: list[dict[str, Any]] = []
+    text_flags: list[dict[str, Any]] = []
+    outlier_flags: list[dict[str, Any]] = []
+    orphan_flags: list[dict[str, Any]] = []
     notes: list[str] = []
 
+    parent_keys = _index_pk_shaped_columns(schema_info) if deep else {}
+
     for table in schema_info:
         table_name = table["name"]
-        row_count = table.get("row_count")
-        for column in table.get("columns", []):
+        schema_row_count = table.get("row_count")
+        columns = table.get("columns", [])
+        if not columns:
+            continue
+
+        batch = await _batched_column_scan(
+            run_sql, table_name, columns, deep=deep, sql_dialect=sql_dialect
+        )
+        row_count = schema_row_count if schema_row_count is not None else batch.get("__row_count")
+
+        for column in columns:
             column_name = column["name"]
             dtype = column.get("dtype", "")
+            stats = batch.get(column_name)
+            if not stats:
+                continue
 
-            null_count = await _run_scalar(
-                run_sql,
-                (
-                    f"SELECT SUM(CASE WHEN {_quote_identifier(column_name)} IS NULL THEN 1 ELSE 0 END) "
-                    f"AS value FROM {_quote_identifier(table_name)}"
-                ),
-                "value",
-            )
+            null_count = stats.get("null_count")
             if null_count and row_count:
                 try:
                     null_rate = round((float(null_count or 0) / row_count) * 100, 1)
@@ -879,40 +902,94 @@ async def build_quality_overview(  # noqa: C901
                     )
 
             if _is_numeric_dtype(dtype) and not _looks_like_identifier(column_name):
-                stats = await _get_numeric_stats(run_sql, table_name, column_name)
-                if stats:
-                    min_value = stats.get("min")
-                    max_value = stats.get("max")
-                    avg_value = stats.get("avg")
-                    if min_value == max_value and min_value is not None:
-                        numeric_flags.append(
-                            {
-                                "table": table_name,
-                                "column": column_name,
-                                "issue": f"constant numeric value ({min_value})",
-                            }
-                        )
-                    elif avg_value in {min_value, max_value} and min_value != max_value:
-                        numeric_flags.append(
-                            {
-                                "table": table_name,
-                                "column": column_name,
-                                "issue": f"average sits on boundary ({avg_value})",
-                            }
-                        )
+                min_value = stats.get("min")
+                max_value = stats.get("max")
+                avg_value = stats.get("avg")
+                if min_value == max_value and min_value is not None:
+                    numeric_flags.append(
+                        {
+                            "table": table_name,
+                            "column": column_name,
+                            "issue": f"constant numeric value ({min_value})",
+                        }
+                    )
+                elif avg_value in {min_value, max_value} and min_value != max_value:
+                    numeric_flags.append(
+                        {
+                            "table": table_name,
+                            "column": column_name,
+                            "issue": f"average sits on boundary ({avg_value})",
+                        }
+                    )
             elif _is_date_dtype(dtype):
                 coverage = await _get_date_coverage(run_sql, table_name, column_name)
                 if coverage:
                     date_columns.append(coverage)
 
+            if deep and _is_text_dtype(dtype):
+                from datasight.cleanup import (
+                    empty_string_preview,
+                    whitespace_preview,
+                )
+
+                ws = stats.get("whitespace_count")
+                empty = stats.get("empty_count")
+                if ws:
+                    text_flags.append(
+                        {
+                            "table": table_name,
+                            "column": column_name,
+                            "issue": "leading/trailing whitespace",
+                            "count": int(ws),
+                            "cleanup_sql": whitespace_preview(
+                                table_name, column_name, sql_dialect
+                            ),
+                        }
+                    )
+                if empty:
+                    text_flags.append(
+                        {
+                            "table": table_name,
+                            "column": column_name,
+                            "issue": "empty string used in place of NULL",
+                            "count": int(empty),
+                            "cleanup_sql": empty_string_preview(
+                                table_name, column_name, sql_dialect
+                            ),
+                        }
+                    )
+
+        if deep:
+            duplicate_rows.extend(
+                await _detect_whole_row_duplicates(run_sql, table_name, sql_dialect)
+            )
+            pk_duplicates.extend(
+                await _detect_pk_duplicates(run_sql, table_name, columns, sql_dialect)
+            )
+            outlier_flags.extend(
+                await _detect_numeric_outliers(run_sql, table_name, columns, sql_dialect)
+            )
+            orphan_flags.extend(
+                await _detect_orphan_fks(run_sql, table_name, columns, parent_keys, sql_dialect)
+            )
+
     if not null_columns:
         notes.append("No null-heavy columns detected in the sampled profiling pass.")
     if not date_columns:
         notes.append("No obvious date columns detected for freshness checks.")
     if not numeric_flags:
         notes.append("No obviously degenerate numeric ranges detected.")
+    if deep:
+        if not duplicate_rows and not pk_duplicates:
+            notes.append("No duplicate rows or duplicate primary-key-shaped values detected.")
+        if not text_flags:
+            notes.append("No text-cleanliness issues (whitespace, empty strings) detected.")
+        if not outlier_flags:
+            notes.append("No IQR outliers detected in numeric columns.")
+        if not orphan_flags:
+            notes.append("No orphan foreign-key-shaped values detected.")
 
-    return {
+    result: dict[str, Any] = {
         "table_count": len(schema_info),
         "null_columns": sorted(
             null_columns,
@@ -923,6 +1000,334 @@ async def build_quality_overview(  # noqa: C901
         "date_columns": date_columns[:6],
         "notes": notes,
     }
+    if deep:
+        result["deep"] = True
+        result["duplicate_rows"] = duplicate_rows
+        result["pk_duplicates"] = pk_duplicates[:8]
+        result["text_flags"] = text_flags[:12]
+        result["outlier_flags"] = outlier_flags[:12]
+        result["orphan_flags"] = orphan_flags[:12]
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Batched per-table column scan
+# ---------------------------------------------------------------------------
+
+
+async def _batched_column_scan(
+    run_sql: RunSql,
+    table_name: str,
+    columns: list[dict[str, Any]],
+    *,
+    deep: bool,
+    sql_dialect: str,
+) -> dict[str, Any]:
+    """Project counts and (for numeric/text columns) stats in a single query.
+
+    Returns a mapping of column name → dict with keys ``null_count``,
+    ``min``, ``max``, ``avg``, ``whitespace_count``, ``empty_count``. The
+    special key ``__row_count`` holds ``COUNT(*)`` for the table. Missing
+    keys mean the check didn't apply to that column.
+    """
+    select_parts: list[str] = ["COUNT(*) AS __row_count"]
+    plans: list[tuple[int, dict[str, Any]]] = []
+
+    for idx, column in enumerate(columns):
+        name = column["name"]
+        dtype = column.get("dtype", "")
+        quoted = _quote_identifier(name)
+        select_parts.append(f"COUNT({quoted}) AS nn_{idx}")
+        plan: dict[str, Any] = {"name": name, "is_numeric": False, "is_text": False}
+        if _is_numeric_dtype(dtype) and not _looks_like_identifier(name):
+            select_parts.append(f"MIN({quoted}) AS mn_{idx}")
+            select_parts.append(f"MAX({quoted}) AS mx_{idx}")
+            select_parts.append(f"AVG({quoted}) AS av_{idx}")
+            plan["is_numeric"] = True
+        if deep and _is_text_dtype(dtype):
+            select_parts.append(
+                f"SUM(CASE WHEN {quoted} IS NOT NULL AND {quoted} <> TRIM({quoted}) "
+                f"THEN 1 ELSE 0 END) AS ws_{idx}"
+            )
+            select_parts.append(f"SUM(CASE WHEN {quoted} = '' THEN 1 ELSE 0 END) AS em_{idx}")
+            plan["is_text"] = True
+        plans.append((idx, plan))
+
+    sql = f"SELECT {', '.join(select_parts)} FROM {_quote_identifier(table_name)}"
+    try:
+        df = await run_sql(sql)
+    except Exception as exc:
+        logger.debug(f"Batched column scan failed for {table_name}: {exc}")
+        return {}
+    if df.empty:
+        return {}
+
+    row = df.iloc[0]
+    row_count = _to_int_or_none(row.get("__row_count"))
+    nonnull_total = row_count if row_count is not None else 0
+
+    out: dict[str, Any] = {"__row_count": row_count}
+    for idx, plan in plans:
+        name = plan["name"]
+        nn = _to_int_or_none(row.get(f"nn_{idx}"))
+        entry: dict[str, Any] = {}
+        if nn is not None and row_count is not None:
+            entry["null_count"] = max(0, nonnull_total - nn)
+        if plan["is_numeric"]:
+            entry["min"] = _scalar_or_none(row.get(f"mn_{idx}"))
+            entry["max"] = _scalar_or_none(row.get(f"mx_{idx}"))
+            entry["avg"] = _scalar_or_none(row.get(f"av_{idx}"))
+        if plan["is_text"]:
+            entry["whitespace_count"] = _to_int_or_none(row.get(f"ws_{idx}")) or 0
+            entry["empty_count"] = _to_int_or_none(row.get(f"em_{idx}")) or 0
+        out[name] = entry
+    return out
+
+
+def _scalar_or_none(value: Any) -> str | None:
+    """Convert a SQL scalar to a stringified value or None."""
+    if value is None:
+        return None
+    if isinstance(value, float) and math.isnan(value):
+        return None
+    return str(value)
+
+
+# ---------------------------------------------------------------------------
+# Deep detectors
+# ---------------------------------------------------------------------------
+
+
+def _index_pk_shaped_columns(
+    schema_info: list[dict[str, Any]],
+) -> dict[str, tuple[str, str]]:
+    """Map ``<table_name_lower>`` → ``(parent_table, parent_column)``.
+
+    A table contributes if it has exactly one ID-shaped column. Used for the
+    orphan-FK detector: a child column named ``<parent>_id`` (or ``<parent>``)
+    can be checked against the parent's PK-shaped column.
+    """
+    parent_keys: dict[str, tuple[str, str]] = {}
+    for table in schema_info:
+        table_name = table["name"]
+        id_cols = [
+            c["name"] for c in table.get("columns", []) if _looks_like_identifier(c["name"])
+        ]
+        if len(id_cols) == 1:
+            parent_keys[table_name.lower()] = (table_name, id_cols[0])
+    return parent_keys
+
+
+async def _detect_whole_row_duplicates(
+    run_sql: RunSql, table_name: str, sql_dialect: str
+) -> list[dict[str, Any]]:
+    """Count rows that are exact duplicates across all columns."""
+    qt = _quote_identifier(table_name)
+    if sql_dialect == "duckdb":
+        sql = (
+            f"SELECT (SELECT COUNT(*) FROM {qt}) - "
+            f"(SELECT COUNT(*) FROM (SELECT DISTINCT * FROM {qt})) AS dup_count"
+        )
+    else:
+        # SQLite and Postgres both accept this subquery form.
+        sql = (
+            f"SELECT (SELECT COUNT(*) FROM {qt}) - "
+            f"(SELECT COUNT(*) FROM (SELECT DISTINCT * FROM {qt}) AS _d) "
+            f"AS dup_count"
+        )
+    try:
+        df = await run_sql(sql)
+    except Exception as exc:
+        logger.debug(f"Whole-row duplicate check failed for {table_name}: {exc}")
+        return []
+    if df.empty:
+        return []
+    dup = _to_int_or_none(df.iloc[0].get("dup_count"))
+    if not dup:
+        return []
+    from datasight.cleanup import whole_row_dedup_preview
+
+    return [
+        {
+            "table": table_name,
+            "duplicate_count": dup,
+            "cleanup_sql": whole_row_dedup_preview(table_name, sql_dialect),
+        }
+    ]
+
+
+async def _detect_pk_duplicates(
+    run_sql: RunSql,
+    table_name: str,
+    columns: list[dict[str, Any]],
+    sql_dialect: str,
+) -> list[dict[str, Any]]:
+    """For each ID-shaped column, find values appearing more than once."""
+    findings: list[dict[str, Any]] = []
+    qt = _quote_identifier(table_name)
+    for column in columns:
+        name = column["name"]
+        if not _looks_like_identifier(name):
+            continue
+        qc = _quote_identifier(name)
+        sql = (
+            f"SELECT {qc} AS value, COUNT(*) AS n FROM {qt} "
+            f"WHERE {qc} IS NOT NULL "
+            f"GROUP BY {qc} HAVING COUNT(*) > 1 "
+            f"ORDER BY COUNT(*) DESC LIMIT 5"
+        )
+        try:
+            df = await run_sql(sql)
+        except Exception as exc:
+            logger.debug(f"PK duplicate check failed for {table_name}.{name}: {exc}")
+            continue
+        if df.empty:
+            continue
+        examples = [{"value": str(r["value"]), "count": int(r["n"])} for _, r in df.iterrows()]
+        from datasight.cleanup import pk_dedup_preview
+
+        findings.append(
+            {
+                "table": table_name,
+                "column": name,
+                "examples": examples,
+                "cleanup_sql": pk_dedup_preview(table_name, name, sql_dialect),
+            }
+        )
+    return findings
+
+
+async def _detect_numeric_outliers(
+    run_sql: RunSql,
+    table_name: str,
+    columns: list[dict[str, Any]],
+    sql_dialect: str,
+) -> list[dict[str, Any]]:
+    """Flag numeric columns whose IQR-based outlier fence is exceeded.
+
+    Skipped for SQLite (no percentile aggregate available without
+    extensions). Identifier-shaped columns are skipped to avoid false
+    positives on sparse ID ranges.
+    """
+    if sql_dialect == "sqlite":
+        return []
+    findings: list[dict[str, Any]] = []
+    qt = _quote_identifier(table_name)
+    for column in columns:
+        name = column["name"]
+        dtype = column.get("dtype", "")
+        if not _is_numeric_dtype(dtype) or _looks_like_identifier(name):
+            continue
+        qc = _quote_identifier(name)
+        if sql_dialect == "postgres":
+            q1_expr = f"percentile_cont(0.25) WITHIN GROUP (ORDER BY {qc})"
+            q3_expr = f"percentile_cont(0.75) WITHIN GROUP (ORDER BY {qc})"
+        else:  # duckdb
+            q1_expr = f"quantile_cont({qc}, 0.25)"
+            q3_expr = f"quantile_cont({qc}, 0.75)"
+        sql = (
+            f"WITH q AS (SELECT {q1_expr} AS q1, {q3_expr} AS q3 FROM {qt}) "
+            f"SELECT q.q1 AS q1, q.q3 AS q3, "
+            f"(SELECT COUNT(*) FROM {qt}, q "
+            f"WHERE {qc} IS NOT NULL "
+            f"AND ({qc} < q.q1 - 1.5 * (q.q3 - q.q1) "
+            f"OR {qc} > q.q3 + 1.5 * (q.q3 - q.q1))) AS outlier_count "
+            f"FROM q"
+        )
+        try:
+            df = await run_sql(sql)
+        except Exception as exc:
+            logger.debug(f"Outlier check failed for {table_name}.{name}: {exc}")
+            continue
+        if df.empty:
+            continue
+        row = df.iloc[0]
+        count = _to_int_or_none(row.get("outlier_count"))
+        if not count:
+            continue
+        q1 = _scalar_or_none(row.get("q1"))
+        q3 = _scalar_or_none(row.get("q3"))
+        from datasight.cleanup import outlier_preview
+
+        findings.append(
+            {
+                "table": table_name,
+                "column": name,
+                "outlier_count": count,
+                "q1": q1,
+                "q3": q3,
+                "cleanup_sql": outlier_preview(table_name, name, q1, q3, sql_dialect),
+            }
+        )
+    return findings
+
+
+async def _detect_orphan_fks(  # noqa: C901
+    run_sql: RunSql,
+    table_name: str,
+    columns: list[dict[str, Any]],
+    parent_keys: dict[str, tuple[str, str]],
+    sql_dialect: str,
+) -> list[dict[str, Any]]:
+    """For columns shaped like ``<parent>_id``, count values not in parent.
+
+    Self-references (parent == child) are skipped to avoid double-counting
+    a table's own PK.
+    """
+    findings: list[dict[str, Any]] = []
+    qt = _quote_identifier(table_name)
+    for column in columns:
+        name = column["name"]
+        if not _looks_like_identifier(name):
+            continue
+        parent_lookup = name.lower()
+        if parent_lookup.endswith("_id"):
+            parent_lookup = parent_lookup[:-3]
+        elif parent_lookup.startswith("id_"):
+            parent_lookup = parent_lookup[3:]
+        elif parent_lookup == "id":
+            continue
+        # Try both singular and a naive pluralization.
+        candidates = [parent_lookup, parent_lookup + "s"]
+        match = next((parent_keys[c] for c in candidates if c in parent_keys), None)
+        if not match:
+            continue
+        parent_table, parent_column = match
+        if parent_table.lower() == table_name.lower():
+            continue
+        qc = _quote_identifier(name)
+        qpt = _quote_identifier(parent_table)
+        qpc = _quote_identifier(parent_column)
+        sql = (
+            f"SELECT COUNT(DISTINCT {qc}) AS orphan_count FROM {qt} "
+            f"WHERE {qc} IS NOT NULL "
+            f"AND {qc} NOT IN (SELECT {qpc} FROM {qpt} WHERE {qpc} IS NOT NULL)"
+        )
+        try:
+            df = await run_sql(sql)
+        except Exception as exc:
+            logger.debug(f"Orphan FK check failed for {table_name}.{name}: {exc}")
+            continue
+        if df.empty:
+            continue
+        count = _to_int_or_none(df.iloc[0].get("orphan_count"))
+        if not count:
+            continue
+        from datasight.cleanup import orphan_fk_preview
+
+        findings.append(
+            {
+                "table": table_name,
+                "column": name,
+                "parent_table": parent_table,
+                "parent_column": parent_column,
+                "orphan_count": count,
+                "cleanup_sql": orphan_fk_preview(
+                    table_name, name, parent_table, parent_column, sql_dialect
+                ),
+            }
+        )
+    return findings
 
 
 async def build_trend_overview(
diff --git a/src/datasight/web/app.py b/src/datasight/web/app.py
index 62f80ae..89644ca 100644
--- a/src/datasight/web/app.py
+++ b/src/datasight/web/app.py
@@ -1979,7 +1979,9 @@ async def get_quality_overview(table: str | None = None, state: AppState = Depen
     overview, cached = await _get_cached_insight(
         state,
         cache_key,
-        lambda: build_quality_overview(schema_info, sql_runner.run_sql),
+        lambda: build_quality_overview(
+            schema_info, sql_runner.run_sql, sql_dialect=state.sql_dialect
+        ),
     )
     return {"overview": overview, "cached": cached}
 
diff --git a/tests/test_cli_tools.py b/tests/test_cli_tools.py
index 500620f..c732b3c 100644
--- a/tests/test_cli_tools.py
+++ b/tests/test_cli_tools.py
@@ -184,6 +184,162 @@ def test_quality_markdown_output_writes_file(project_dir, tmp_path):
     assert "## Date Coverage" in text
 
 
+def _fake_deep_quality_data() -> dict:
+    """Synthesized output from build_quality_overview with deep=True."""
+    return {
+        "table_count": 2,
+        "null_columns": [
+            {"table": "orders", "column": "notes", "null_count": 9, "null_rate": 90.0}
+        ],
+        "numeric_flags": [
+            {"table": "orders", "column": "qty", "issue": "constant numeric value (1)"}
+        ],
+        "date_columns": [
+            {"table": "orders", "column": "order_date", "min": "2024-01-01", "max": "2024-12-31"}
+        ],
+        "notes": ["All checks ran."],
+        "deep": True,
+        "duplicate_rows": [
+            {
+                "table": "orders",
+                "duplicate_count": 3,
+                "cleanup_sql": 'SELECT DISTINCT * FROM "orders";',
+            }
+        ],
+        "pk_duplicates": [
+            {
+                "table": "orders",
+                "column": "id",
+                "examples": [{"value": "7", "count": 2}, {"value": "9", "count": 2}],
+                "cleanup_sql": 'SELECT * FROM "orders" QUALIFY ROW_NUMBER() OVER (PARTITION BY "id" ORDER BY "id") = 1;',
+            }
+        ],
+        "text_flags": [
+            {
+                "table": "orders",
+                "column": "region",
+                "issue": "leading/trailing whitespace",
+                "count": 4,
+                "cleanup_sql": 'SELECT "region" AS original, TRIM("region") AS trimmed FROM "orders";',
+            },
+            {
+                "table": "orders",
+                "column": "region",
+                "issue": "empty string used in place of NULL",
+                "count": 2,
+                "cleanup_sql": 'SELECT * FROM "orders" WHERE "region" = \'\';',
+            },
+        ],
+        "outlier_flags": [
+            {
+                "table": "orders",
+                "column": "amount",
+                "outlier_count": 11,
+                "q1": "10.0",
+                "q3": "30.0",
+                "cleanup_sql": (
+                    '-- Rows in \'amount\' outside the IQR fence [q1=10.0, q3=30.0].\n'
+                    'SELECT * FROM "orders" WHERE "amount" IS NOT NULL;'
+                ),
+            }
+        ],
+        "orphan_flags": [
+            {
+                "table": "orders",
+                "column": "product_id",
+                "parent_table": "products",
+                "parent_column": "id",
+                "orphan_count": 2,
+                "cleanup_sql": (
+                    "SELECT DISTINCT \"product_id\" FROM \"orders\" "
+                    "WHERE \"product_id\" NOT IN (SELECT \"id\" FROM \"products\");"
+                ),
+            }
+        ],
+    }
+
+
+def test_render_quality_markdown_deep_sections():
+    """The markdown renderer surfaces every deep finding plus cleanup SQL."""
+    from datasight.cli import render_quality_markdown
+
+    md = render_quality_markdown(_fake_deep_quality_data())
+    assert "## Whole-Row Duplicates" in md
+    assert "3 duplicate row(s)" in md
+    assert "## Primary-Key-Shaped Duplicates" in md
+    assert "7 (×2)" in md
+    assert "## Text Cleanliness" in md
+    assert "leading/trailing whitespace" in md
+    assert "empty string used in place of NULL" in md
+    assert "## Numeric Outliers (IQR)" in md
+    assert "11 row(s) outside" in md
+    assert "## Orphan Foreign-Key-Shaped Values" in md
+    assert "products.id" in md
+    assert "## Suggested Cleanup" in md
+    # Each cleanup SQL block should appear under a level-3 heading.
+    assert "### orders (whole-row dedup)" in md
+    assert "### orders.id (PK dedup)" in md
+    assert "### orders.region (leading/trailing whitespace)" in md
+    assert "### orders.amount (outliers)" in md
+    assert "### orders.product_id (orphans → products)" in md
+    # ```sql fences enclose at least one SQL snippet.
+    assert md.count("```sql") >= 5
+
+
+def test_quality_cli_deep_renders_all_sections(project_dir, monkeypatch):
+    """End-to-end: --deep CLI output renders the Rich tables and cleanup panel."""
+    from datasight.cli_commands import quality as quality_cmd
+
+    async def fake_overview(schema_info, run_sql, **kwargs):  # noqa: ARG001
+        assert kwargs.get("deep") is True
+        assert kwargs.get("sql_dialect") in {"duckdb", "sqlite", "postgres"}
+        return _fake_deep_quality_data()
+
+    monkeypatch.setattr(quality_cmd, "build_quality_overview", fake_overview)
+
+    runner = CliRunner()
+    result = runner.invoke(cli, ["quality", "--project-dir", project_dir, "--deep"])
+    assert result.exit_code == 0, result.output
+    output = result.output
+    assert "Whole-Row Duplicates" in output
+    assert "Primary-Key-Shaped Duplicates" in output
+    assert "Text Cleanliness" in output
+    assert "Numeric Outliers" in output
+    assert "Orphan Foreign-Key-Shaped Values" in output
+    assert "Suggested Cleanup" in output
+    # One of the previewed cleanup SQL snippets should appear in the panel.
+    assert "SELECT DISTINCT" in output
+
+
+def test_quality_cli_deep_markdown_output(project_dir, monkeypatch, tmp_path):
+    """--deep --format markdown writes deep sections to file."""
+    from datasight.cli_commands import quality as quality_cmd
+
+    async def fake_overview(schema_info, run_sql, **_):  # noqa: ARG001
+        return _fake_deep_quality_data()
+
+    monkeypatch.setattr(quality_cmd, "build_quality_overview", fake_overview)
+    output_path = tmp_path / "deep.md"
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "quality",
+            "--project-dir",
+            project_dir,
+            "--deep",
+            "--format",
+            "markdown",
+            "--output",
+            str(output_path),
+        ],
+    )
+    assert result.exit_code == 0, result.output
+    text = output_path.read_text(encoding="utf-8")
+    assert "## Suggested Cleanup" in text
+    assert "### orders (whole-row dedup)" in text
+
+
 def test_quality_json_output_writes_file(project_dir, tmp_path):
     output_path = tmp_path / "quality.json"
     runner = CliRunner()
diff --git a/tests/test_data_profile_extra.py b/tests/test_data_profile_extra.py
index 97b0cf6..e876241 100644
--- a/tests/test_data_profile_extra.py
+++ b/tests/test_data_profile_extra.py
@@ -511,3 +511,213 @@ async def test_build_dimension_overview(energy_conn):
     out = await build_dimension_overview(_schema_info(), _rs(energy_conn))
     assert "dimension_columns" in out
     assert "join_hints" in out
+
+
+# ---------------------------------------------------------------------------
+# Deep-mode quality checks
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def messy_conn(tmp_path):
+    """Fixture that intentionally exercises each deep detector."""
+    db = tmp_path / "m.duckdb"
+    conn = duckdb.connect(str(db))
+    conn.execute("CREATE TABLE plants (plant_id INTEGER, plant_name VARCHAR)")
+    conn.execute("INSERT INTO plants VALUES (1, 'Alpha'), (2, 'Beta'), (3, 'Gamma')")
+    conn.execute(
+        "CREATE TABLE generation (plant_id INTEGER, fuel_type VARCHAR, state VARCHAR, mwh DOUBLE)"
+    )
+    # Most rows are tight; one outlier on mwh.
+    base_rows = [(i % 3 + 1, "coal", "CA", 100.0 + i) for i in range(40)]
+    # Whole-row duplicate (entire row appears twice)
+    base_rows.append((1, "gas", "OR", 50.0))
+    base_rows.append((1, "gas", "OR", 50.0))
+    # PK-shaped duplicate on plant_id (already true via base rows, but make
+    # sure at least one value is duplicated with differing other columns).
+    base_rows.append((4, "gas", "WA", 200.0))
+    base_rows.append((4, "gas", "TX", 210.0))
+    # Whitespace and empty-string in fuel_type/state
+    base_rows.append((1, " coal ", "CA", 100.0))
+    base_rows.append((2, "", "CA", 100.0))
+    # Numeric outlier (well outside IQR).
+    base_rows.append((1, "coal", "CA", 99999.0))
+    # Orphan FK: plant_id=99 not in plants.
+    base_rows.append((99, "coal", "CA", 120.0))
+    conn.executemany("INSERT INTO generation VALUES (?, ?, ?, ?)", base_rows)
+    yield conn
+    conn.close()
+
+
+def _messy_schema_info() -> list[dict[str, object]]:
+    return [
+        {
+            "name": "plants",
+            "row_count": 3,
+            "columns": [
+                {"name": "plant_id", "dtype": "INTEGER"},
+                {"name": "plant_name", "dtype": "VARCHAR"},
+            ],
+        },
+        {
+            "name": "generation",
+            "row_count": 47,
+            "columns": [
+                {"name": "plant_id", "dtype": "INTEGER"},
+                {"name": "fuel_type", "dtype": "VARCHAR"},
+                {"name": "state", "dtype": "VARCHAR"},
+                {"name": "mwh", "dtype": "DOUBLE"},
+            ],
+        },
+    ]
+
+
+@pytest.mark.asyncio
+async def test_quality_overview_shallow_omits_deep_keys(energy_conn):
+    out = await build_quality_overview(_schema_info(), _rs(energy_conn))
+    assert "duplicate_rows" not in out
+    assert "outlier_flags" not in out
+    assert out.get("deep") is not True
+
+
+@pytest.mark.asyncio
+async def test_quality_overview_deep_finds_everything(messy_conn):
+    out = await build_quality_overview(
+        _messy_schema_info(),
+        _rs(messy_conn),
+        sql_dialect="duckdb",
+        deep=True,
+    )
+    assert out.get("deep") is True
+
+    # Whole-row dup: the (1, gas, OR, 50) pair.
+    assert any(
+        item["table"] == "generation" and item["duplicate_count"] >= 1
+        for item in out["duplicate_rows"]
+    )
+    assert all("cleanup_sql" in item for item in out["duplicate_rows"])
+
+    # PK duplicates on generation.plant_id (multiple rows per plant_id).
+    pk = [
+        item
+        for item in out["pk_duplicates"]
+        if item["table"] == "generation" and item["column"] == "plant_id"
+    ]
+    assert pk and pk[0]["examples"]
+    assert "cleanup_sql" in pk[0]
+
+    # Text flags: whitespace on fuel_type, empty string on fuel_type.
+    issues = {(item["column"], item["issue"]) for item in out["text_flags"]}
+    assert ("fuel_type", "leading/trailing whitespace") in issues
+    assert ("fuel_type", "empty string used in place of NULL") in issues
+    for item in out["text_flags"]:
+        assert "cleanup_sql" in item
+
+    # Outlier on mwh.
+    outliers = [item for item in out["outlier_flags"] if item["column"] == "mwh"]
+    assert outliers and outliers[0]["outlier_count"] >= 1
+    assert "cleanup_sql" in outliers[0]
+
+    # Orphan FK: generation.plant_id=99 has no parent in plants.
+    orphans = [
+        item
+        for item in out["orphan_flags"]
+        if item["table"] == "generation" and item["column"] == "plant_id"
+    ]
+    assert orphans and orphans[0]["parent_table"] == "plants"
+    assert orphans[0]["orphan_count"] >= 1
+    assert "cleanup_sql" in orphans[0]
+
+
+@pytest.mark.asyncio
+async def test_quality_overview_batched_scan_single_query(messy_conn):
+    """The batched-scan refactor should issue one scan SQL per table."""
+    seen: list[str] = []
+
+    async def tracking_run(sql):
+        seen.append(sql)
+        return await _rs(messy_conn)(sql)
+
+    await build_quality_overview(_messy_schema_info(), tracking_run)
+    # One COUNT/MIN/MAX/AVG SELECT per table — find them by the marker alias.
+    batched = [s for s in seen if "__row_count" in s]
+    assert len(batched) == 2  # one per table
+
+
+def test_cleanup_dedup_sql_dialects():
+    from datasight.cleanup import pk_dedup_preview, whole_row_dedup_preview
+
+    assert "QUALIFY" in pk_dedup_preview("t", "id", "duckdb")
+    assert "ROW_NUMBER" in pk_dedup_preview("t", "id", "postgres")
+    assert "rowid" in pk_dedup_preview("t", "id", "sqlite")
+    assert "DISTINCT" in whole_row_dedup_preview("t", "duckdb")
+    assert "DISTINCT" in whole_row_dedup_preview("t", "sqlite")
+
+
+def test_cleanup_text_and_outlier_and_orphan_previews():
+    from datasight.cleanup import (
+        empty_string_preview,
+        orphan_fk_preview,
+        outlier_preview,
+        whitespace_preview,
+    )
+
+    assert "= ''" in empty_string_preview("t", "c", "duckdb")
+    assert "TRIM" in whitespace_preview("t", "c", "duckdb")
+    # Outlier preview inlines q1/q3 as literals.
+    sql = outlier_preview("t", "c", "1.0", "9.0", "duckdb")
+    assert "1.0" in sql and "9.0" in sql
+    # Fallback when q1/q3 are unknown.
+    assert "ORDER BY" in outlier_preview("t", "c", None, None, "duckdb")
+    fk = orphan_fk_preview("child", "fk", "parent", "id", "duckdb")
+    assert "NOT IN" in fk and "parent" in fk
+
+
+@pytest.mark.asyncio
+async def test_deep_detectors_swallow_query_errors():
+    """Each detector should return [] when the underlying SQL fails."""
+    from datasight.data_profile import (
+        _detect_numeric_outliers,
+        _detect_orphan_fks,
+        _detect_pk_duplicates,
+        _detect_whole_row_duplicates,
+    )
+
+    async def boom(sql):  # noqa: ARG001
+        msg = "no such table"
+        raise RuntimeError(msg)
+
+    cols = [{"name": "plant_id", "dtype": "INTEGER"}, {"name": "mwh", "dtype": "DOUBLE"}]
+    parents = {"plants": ("plants", "plant_id")}
+    assert await _detect_whole_row_duplicates(boom, "t", "duckdb") == []
+    assert await _detect_pk_duplicates(boom, "t", cols, "duckdb") == []
+    assert await _detect_numeric_outliers(boom, "t", cols, "duckdb") == []
+    assert await _detect_orphan_fks(boom, "t", cols, parents, "duckdb") == []
+
+
+@pytest.mark.asyncio
+async def test_outlier_detector_skipped_on_sqlite():
+    from datasight.data_profile import _detect_numeric_outliers
+
+    async def boom(sql):  # noqa: ARG001 — should never be called
+        msg = "SQL should not run on sqlite"
+        raise AssertionError(msg)
+
+    cols = [{"name": "mwh", "dtype": "DOUBLE"}]
+    assert await _detect_numeric_outliers(boom, "t", cols, "sqlite") == []
+
+
+@pytest.mark.asyncio
+async def test_orphan_detector_skips_self_and_unmatched(messy_conn):
+    """Orphan check requires a parent table with one ID-shaped column."""
+    from datasight.data_profile import _detect_orphan_fks
+
+    # No parent indexed → no findings, regardless of column shape.
+    cols = [{"name": "plant_id", "dtype": "INTEGER"}]
+    assert await _detect_orphan_fks(_rs(messy_conn), "generation", cols, {}, "duckdb") == []
+    # Self-reference (child is also the parent) is skipped.
+    parents = {"generation": ("generation", "plant_id")}
+    assert (
+        await _detect_orphan_fks(_rs(messy_conn), "generation", cols, parents, "duckdb")
+        == []
+    )
diff --git a/tests/test_web_app.py b/tests/test_web_app.py
index e820dcc..74144e9 100644
--- a/tests/test_web_app.py
+++ b/tests/test_web_app.py
@@ -821,7 +821,7 @@ async def run_sql(self, sql):  # noqa: ARG002
     monkeypatch.setattr(
         web_app,
         "build_quality_overview",
-        lambda schema_info, run_sql: _fake_quality_overview(schema_info, run_sql),  # noqa: ARG005
+        lambda schema_info, run_sql, **_: _fake_quality_overview(schema_info, run_sql),  # noqa: ARG005
     )
 
     web_app._state.project_loaded = True
@@ -853,7 +853,7 @@ async def run_sql(self, sql):  # noqa: ARG002
     original_sql_runner = web_app._state.sql_runner
     original_insight_cache = dict(web_app._state._insight_cache)
 
-    async def fake_overview(schema_info, run_sql):  # noqa: ARG001
+    async def fake_overview(schema_info, run_sql, **_):  # noqa: ARG001
         captured["tables"] = [table["name"] for table in schema_info]
         return await _fake_quality_overview(schema_info, run_sql)